translate.c revision 6bfc5c725a5d9b23eb829a9db3c0f0a4c38b5a4e
1/*
2 *  ARM translation
3 *
4 *  Copyright (c) 2003 Fabrice Bellard
5 *  Copyright (c) 2005-2007 CodeSourcery
6 *  Copyright (c) 2007 OpenedHand, Ltd.
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 */
21#include <stdarg.h>
22#include <stdlib.h>
23#include <stdio.h>
24#include <string.h>
25#include <inttypes.h>
26
27#include "cpu.h"
28#include "exec-all.h"
29#include "disas.h"
30#include "tcg-op.h"
31#include "qemu-log.h"
32
33#ifdef CONFIG_TRACE
34#include "trace.h"
35#endif
36
37#include "helpers.h"
38#define GEN_HELPER 1
39#include "helpers.h"
40
41#define ENABLE_ARCH_5J    0
42#define ENABLE_ARCH_6     arm_feature(env, ARM_FEATURE_V6)
43#define ENABLE_ARCH_6K   arm_feature(env, ARM_FEATURE_V6K)
44#define ENABLE_ARCH_6T2   arm_feature(env, ARM_FEATURE_THUMB2)
45#define ENABLE_ARCH_7     arm_feature(env, ARM_FEATURE_V7)
46
47#define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
48
49/* internal defines */
50typedef struct DisasContext {
51    target_ulong pc;
52    int is_jmp;
53    /* Nonzero if this instruction has been conditionally skipped.  */
54    int condjmp;
55    /* The label that will be jumped to when the instruction is skipped.  */
56    int condlabel;
57    /* Thumb-2 condtional execution bits.  */
58    int condexec_mask;
59    int condexec_cond;
60    int condexec_mask_prev;  /* mask at start of instruction/block */
61    struct TranslationBlock *tb;
62    int singlestep_enabled;
63    int thumb;
64#if !defined(CONFIG_USER_ONLY)
65    int user;
66#endif
67#ifdef CONFIG_MEMCHECK
68    int search_pc;
69#endif  // CONFIG_MEMCHECK
70} DisasContext;
71
72#if defined(CONFIG_USER_ONLY)
73#define IS_USER(s) 1
74#else
75#define IS_USER(s) (s->user)
76#endif
77
78#ifdef CONFIG_TRACE
79#include "helpers.h"
80#endif /* CONFIG_TRACE */
81
82#ifdef CONFIG_MEMCHECK
83/*
84 * Memchecker addition in this module is intended to inject qemu callback into
85 * translated code for each BL/BLX, as well as BL/BLX returns. These callbacks
86 * are used to build calling stack of the thread in order to provide better
87 * reporting on memory access violations. Although this may seem as something
88 * that may gratly impact the performance, in reality it doesn't. Overhead that
89 * is added by setting up callbacks and by callbacks themselves is neglectable.
90 * On the other hand, maintaining calling stack can indeed add some perf.
91 * overhead (TODO: provide solid numbers here).
92 * One of the things to watch out with regards to injecting callbacks, is
93 * consistency between intermediate code generated for execution, and for guest
94 * PC address calculation. If code doesn't match, a segmentation fault is
95 * guaranteed.
96 */
97
98#include "memcheck/memcheck_proc_management.h"
99#include "memcheck_arm_helpers.h"
100#endif  // CONFIG_MEMCHECK
101
102/* These instructions trap after executing, so defer them until after the
103   conditional executions state has been updated.  */
104#define DISAS_WFI 4
105#define DISAS_SWI 5
106
107static TCGv_ptr cpu_env;
108/* We reuse the same 64-bit temporaries for efficiency.  */
109static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
110
111/* FIXME:  These should be removed.  */
112static TCGv cpu_T[2];
113static TCGv cpu_F0s, cpu_F1s;
114static TCGv_i64 cpu_F0d, cpu_F1d;
115
116#define ICOUNT_TEMP cpu_T[0]
117#include "gen-icount.h"
118
119/* initialize TCG globals.  */
120void arm_translate_init(void)
121{
122    cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
123
124    cpu_T[0] = tcg_global_reg_new_i32(TCG_AREG1, "T0");
125    cpu_T[1] = tcg_global_reg_new_i32(TCG_AREG2, "T1");
126
127#define GEN_HELPER 2
128#include "helpers.h"
129}
130
131/* The code generator doesn't like lots of temporaries, so maintain our own
132   cache for reuse within a function.  */
133#define MAX_TEMPS 8
134static int num_temps;
135static TCGv temps[MAX_TEMPS];
136
137/* Allocate a temporary variable.  */
138static TCGv_i32 new_tmp(void)
139{
140    TCGv tmp;
141    if (num_temps == MAX_TEMPS)
142        abort();
143
144    if (GET_TCGV_I32(temps[num_temps]))
145      return temps[num_temps++];
146
147    tmp = tcg_temp_new_i32();
148    temps[num_temps++] = tmp;
149    return tmp;
150}
151
152/* Release a temporary variable.  */
153static void dead_tmp(TCGv tmp)
154{
155    int i;
156    num_temps--;
157    i = num_temps;
158    if (TCGV_EQUAL(temps[i], tmp))
159        return;
160
161    /* Shuffle this temp to the last slot.  */
162    while (!TCGV_EQUAL(temps[i], tmp))
163        i--;
164    while (i < num_temps) {
165        temps[i] = temps[i + 1];
166        i++;
167    }
168    temps[i] = tmp;
169}
170
171static inline TCGv load_cpu_offset(int offset)
172{
173    TCGv tmp = new_tmp();
174    tcg_gen_ld_i32(tmp, cpu_env, offset);
175    return tmp;
176}
177
178#define load_cpu_field(name) load_cpu_offset(offsetof(CPUState, name))
179
180static inline void store_cpu_offset(TCGv var, int offset)
181{
182    tcg_gen_st_i32(var, cpu_env, offset);
183    dead_tmp(var);
184}
185
186#define store_cpu_field(var, name) \
187    store_cpu_offset(var, offsetof(CPUState, name))
188
189/* Set a variable to the value of a CPU register.  */
190static void load_reg_var(DisasContext *s, TCGv var, int reg)
191{
192    if (reg == 15) {
193        uint32_t addr;
194        /* normaly, since we updated PC, we need only to add one insn */
195        if (s->thumb)
196            addr = (long)s->pc + 2;
197        else
198            addr = (long)s->pc + 4;
199        tcg_gen_movi_i32(var, addr);
200    } else {
201        tcg_gen_ld_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
202    }
203}
204
205/* Create a new temporary and set it to the value of a CPU register.  */
206static inline TCGv load_reg(DisasContext *s, int reg)
207{
208    TCGv tmp = new_tmp();
209    load_reg_var(s, tmp, reg);
210    return tmp;
211}
212
213/* Set a CPU register.  The source must be a temporary and will be
214   marked as dead.  */
215static void store_reg(DisasContext *s, int reg, TCGv var)
216{
217    if (reg == 15) {
218        tcg_gen_andi_i32(var, var, ~1);
219        s->is_jmp = DISAS_JUMP;
220    }
221    tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
222    dead_tmp(var);
223}
224
225
226/* Basic operations.  */
227#define gen_op_movl_T0_T1() tcg_gen_mov_i32(cpu_T[0], cpu_T[1])
228#define gen_op_movl_T0_im(im) tcg_gen_movi_i32(cpu_T[0], im)
229#define gen_op_movl_T1_im(im) tcg_gen_movi_i32(cpu_T[1], im)
230
231#define gen_op_addl_T1_im(im) tcg_gen_addi_i32(cpu_T[1], cpu_T[1], im)
232#define gen_op_addl_T0_T1() tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_T[1])
233#define gen_op_subl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[0], cpu_T[1])
234#define gen_op_rsbl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[1], cpu_T[0])
235
236#define gen_op_addl_T0_T1_cc() gen_helper_add_cc(cpu_T[0], cpu_T[0], cpu_T[1])
237#define gen_op_adcl_T0_T1_cc() gen_helper_adc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
238#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
239#define gen_op_sbcl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
240#define gen_op_rsbl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[1], cpu_T[0])
241
242#define gen_op_andl_T0_T1() tcg_gen_and_i32(cpu_T[0], cpu_T[0], cpu_T[1])
243#define gen_op_xorl_T0_T1() tcg_gen_xor_i32(cpu_T[0], cpu_T[0], cpu_T[1])
244#define gen_op_orl_T0_T1() tcg_gen_or_i32(cpu_T[0], cpu_T[0], cpu_T[1])
245#define gen_op_notl_T0() tcg_gen_not_i32(cpu_T[0], cpu_T[0])
246#define gen_op_notl_T1() tcg_gen_not_i32(cpu_T[1], cpu_T[1])
247#define gen_op_logic_T0_cc() gen_logic_CC(cpu_T[0]);
248#define gen_op_logic_T1_cc() gen_logic_CC(cpu_T[1]);
249
250#define gen_op_shll_T1_im(im) tcg_gen_shli_i32(cpu_T[1], cpu_T[1], im)
251#define gen_op_shrl_T1_im(im) tcg_gen_shri_i32(cpu_T[1], cpu_T[1], im)
252
253/* Value extensions.  */
254#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
255#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
256#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
257#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
258
259#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
260#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
261
262#define gen_op_mul_T0_T1() tcg_gen_mul_i32(cpu_T[0], cpu_T[0], cpu_T[1])
263
264#define gen_set_cpsr(var, mask) gen_helper_cpsr_write(var, tcg_const_i32(mask))
265/* Set NZCV flags from the high 4 bits of var.  */
266#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
267
268static void gen_exception(int excp)
269{
270    TCGv tmp = new_tmp();
271    tcg_gen_movi_i32(tmp, excp);
272    gen_helper_exception(tmp);
273    dead_tmp(tmp);
274}
275
276static void gen_smul_dual(TCGv a, TCGv b)
277{
278    TCGv tmp1 = new_tmp();
279    TCGv tmp2 = new_tmp();
280    tcg_gen_ext16s_i32(tmp1, a);
281    tcg_gen_ext16s_i32(tmp2, b);
282    tcg_gen_mul_i32(tmp1, tmp1, tmp2);
283    dead_tmp(tmp2);
284    tcg_gen_sari_i32(a, a, 16);
285    tcg_gen_sari_i32(b, b, 16);
286    tcg_gen_mul_i32(b, b, a);
287    tcg_gen_mov_i32(a, tmp1);
288    dead_tmp(tmp1);
289}
290
291/* Byteswap each halfword.  */
292static void gen_rev16(TCGv var)
293{
294    TCGv tmp = new_tmp();
295    tcg_gen_shri_i32(tmp, var, 8);
296    tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff);
297    tcg_gen_shli_i32(var, var, 8);
298    tcg_gen_andi_i32(var, var, 0xff00ff00);
299    tcg_gen_or_i32(var, var, tmp);
300    dead_tmp(tmp);
301}
302
303/* Byteswap low halfword and sign extend.  */
304static void gen_revsh(TCGv var)
305{
306    TCGv tmp = new_tmp();
307    tcg_gen_shri_i32(tmp, var, 8);
308    tcg_gen_andi_i32(tmp, tmp, 0x00ff);
309    tcg_gen_shli_i32(var, var, 8);
310    tcg_gen_ext8s_i32(var, var);
311    tcg_gen_or_i32(var, var, tmp);
312    dead_tmp(tmp);
313}
314
315/* Unsigned bitfield extract.  */
316static void gen_ubfx(TCGv var, int shift, uint32_t mask)
317{
318    if (shift)
319        tcg_gen_shri_i32(var, var, shift);
320    tcg_gen_andi_i32(var, var, mask);
321}
322
323/* Signed bitfield extract.  */
324static void gen_sbfx(TCGv var, int shift, int width)
325{
326    uint32_t signbit;
327
328    if (shift)
329        tcg_gen_sari_i32(var, var, shift);
330    if (shift + width < 32) {
331        signbit = 1u << (width - 1);
332        tcg_gen_andi_i32(var, var, (1u << width) - 1);
333        tcg_gen_xori_i32(var, var, signbit);
334        tcg_gen_subi_i32(var, var, signbit);
335    }
336}
337
338/* Bitfield insertion.  Insert val into base.  Clobbers base and val.  */
339static void gen_bfi(TCGv dest, TCGv base, TCGv val, int shift, uint32_t mask)
340{
341    tcg_gen_andi_i32(val, val, mask);
342    tcg_gen_shli_i32(val, val, shift);
343    tcg_gen_andi_i32(base, base, ~(mask << shift));
344    tcg_gen_or_i32(dest, base, val);
345}
346
347/* Round the top 32 bits of a 64-bit value.  */
348static void gen_roundqd(TCGv a, TCGv b)
349{
350    tcg_gen_shri_i32(a, a, 31);
351    tcg_gen_add_i32(a, a, b);
352}
353
354/* FIXME: Most targets have native widening multiplication.
355   It would be good to use that instead of a full wide multiply.  */
356/* 32x32->64 multiply.  Marks inputs as dead.  */
357static TCGv_i64 gen_mulu_i64_i32(TCGv a, TCGv b)
358{
359    TCGv_i64 tmp1 = tcg_temp_new_i64();
360    TCGv_i64 tmp2 = tcg_temp_new_i64();
361
362    tcg_gen_extu_i32_i64(tmp1, a);
363    dead_tmp(a);
364    tcg_gen_extu_i32_i64(tmp2, b);
365    dead_tmp(b);
366    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
367    return tmp1;
368}
369
370static TCGv_i64 gen_muls_i64_i32(TCGv a, TCGv b)
371{
372    TCGv_i64 tmp1 = tcg_temp_new_i64();
373    TCGv_i64 tmp2 = tcg_temp_new_i64();
374
375    tcg_gen_ext_i32_i64(tmp1, a);
376    dead_tmp(a);
377    tcg_gen_ext_i32_i64(tmp2, b);
378    dead_tmp(b);
379    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
380    return tmp1;
381}
382
383/* Unsigned 32x32->64 multiply.  */
384static void gen_op_mull_T0_T1(void)
385{
386    TCGv_i64 tmp1 = tcg_temp_new_i64();
387    TCGv_i64 tmp2 = tcg_temp_new_i64();
388
389    tcg_gen_extu_i32_i64(tmp1, cpu_T[0]);
390    tcg_gen_extu_i32_i64(tmp2, cpu_T[1]);
391    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
392    tcg_gen_trunc_i64_i32(cpu_T[0], tmp1);
393    tcg_gen_shri_i64(tmp1, tmp1, 32);
394    tcg_gen_trunc_i64_i32(cpu_T[1], tmp1);
395}
396
397/* Signed 32x32->64 multiply.  */
398static void gen_imull(TCGv a, TCGv b)
399{
400    TCGv_i64 tmp1 = tcg_temp_new_i64();
401    TCGv_i64 tmp2 = tcg_temp_new_i64();
402
403    tcg_gen_ext_i32_i64(tmp1, a);
404    tcg_gen_ext_i32_i64(tmp2, b);
405    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
406    tcg_gen_trunc_i64_i32(a, tmp1);
407    tcg_gen_shri_i64(tmp1, tmp1, 32);
408    tcg_gen_trunc_i64_i32(b, tmp1);
409}
410#define gen_op_imull_T0_T1() gen_imull(cpu_T[0], cpu_T[1])
411
412/* Swap low and high halfwords.  */
413static void gen_swap_half(TCGv var)
414{
415    TCGv tmp = new_tmp();
416    tcg_gen_shri_i32(tmp, var, 16);
417    tcg_gen_shli_i32(var, var, 16);
418    tcg_gen_or_i32(var, var, tmp);
419    dead_tmp(tmp);
420}
421
422/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
423    tmp = (t0 ^ t1) & 0x8000;
424    t0 &= ~0x8000;
425    t1 &= ~0x8000;
426    t0 = (t0 + t1) ^ tmp;
427 */
428
429static void gen_add16(TCGv t0, TCGv t1)
430{
431    TCGv tmp = new_tmp();
432    tcg_gen_xor_i32(tmp, t0, t1);
433    tcg_gen_andi_i32(tmp, tmp, 0x8000);
434    tcg_gen_andi_i32(t0, t0, ~0x8000);
435    tcg_gen_andi_i32(t1, t1, ~0x8000);
436    tcg_gen_add_i32(t0, t0, t1);
437    tcg_gen_xor_i32(t0, t0, tmp);
438    dead_tmp(tmp);
439    dead_tmp(t1);
440}
441
442#define gen_set_CF(var) tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, CF))
443
444/* Set CF to the top bit of var.  */
445static void gen_set_CF_bit31(TCGv var)
446{
447    TCGv tmp = new_tmp();
448    tcg_gen_shri_i32(tmp, var, 31);
449    gen_set_CF(tmp);
450    dead_tmp(tmp);
451}
452
453/* Set N and Z flags from var.  */
454static inline void gen_logic_CC(TCGv var)
455{
456    tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, NF));
457    tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, ZF));
458}
459
460/* T0 += T1 + CF.  */
461static void gen_adc_T0_T1(void)
462{
463    TCGv tmp;
464    gen_op_addl_T0_T1();
465    tmp = load_cpu_field(CF);
466    tcg_gen_add_i32(cpu_T[0], cpu_T[0], tmp);
467    dead_tmp(tmp);
468}
469
470/* dest = T0 + T1 + CF. */
471static void gen_add_carry(TCGv dest, TCGv t0, TCGv t1)
472{
473    TCGv tmp;
474    tcg_gen_add_i32(dest, t0, t1);
475    tmp = load_cpu_field(CF);
476    tcg_gen_add_i32(dest, dest, tmp);
477    dead_tmp(tmp);
478}
479
480/* dest = T0 - T1 + CF - 1.  */
481static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
482{
483    TCGv tmp;
484    tcg_gen_sub_i32(dest, t0, t1);
485    tmp = load_cpu_field(CF);
486    tcg_gen_add_i32(dest, dest, tmp);
487    tcg_gen_subi_i32(dest, dest, 1);
488    dead_tmp(tmp);
489}
490
491#define gen_sbc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[0], cpu_T[1])
492#define gen_rsc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[1], cpu_T[0])
493
494/* T0 &= ~T1.  Clobbers T1.  */
495/* FIXME: Implement bic natively.  */
496static inline void tcg_gen_bic_i32(TCGv dest, TCGv t0, TCGv t1)
497{
498    TCGv tmp = new_tmp();
499    tcg_gen_not_i32(tmp, t1);
500    tcg_gen_and_i32(dest, t0, tmp);
501    dead_tmp(tmp);
502}
503static inline void gen_op_bicl_T0_T1(void)
504{
505    gen_op_notl_T1();
506    gen_op_andl_T0_T1();
507}
508
509/* FIXME:  Implement this natively.  */
510#define tcg_gen_abs_i32(t0, t1) gen_helper_abs(t0, t1)
511
512/* FIXME:  Implement this natively.  */
513static void tcg_gen_rori_i32(TCGv t0, TCGv t1, int i)
514{
515    TCGv tmp;
516
517    if (i == 0)
518        return;
519
520    tmp = new_tmp();
521    tcg_gen_shri_i32(tmp, t1, i);
522    tcg_gen_shli_i32(t1, t1, 32 - i);
523    tcg_gen_or_i32(t0, t1, tmp);
524    dead_tmp(tmp);
525}
526
527static void shifter_out_im(TCGv var, int shift)
528{
529    TCGv tmp = new_tmp();
530    if (shift == 0) {
531        tcg_gen_andi_i32(tmp, var, 1);
532    } else {
533        tcg_gen_shri_i32(tmp, var, shift);
534        if (shift != 31)
535            tcg_gen_andi_i32(tmp, tmp, 1);
536    }
537    gen_set_CF(tmp);
538    dead_tmp(tmp);
539}
540
541/* Shift by immediate.  Includes special handling for shift == 0.  */
542static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
543{
544    switch (shiftop) {
545    case 0: /* LSL */
546        if (shift != 0) {
547            if (flags)
548                shifter_out_im(var, 32 - shift);
549            tcg_gen_shli_i32(var, var, shift);
550        }
551        break;
552    case 1: /* LSR */
553        if (shift == 0) {
554            if (flags) {
555                tcg_gen_shri_i32(var, var, 31);
556                gen_set_CF(var);
557            }
558            tcg_gen_movi_i32(var, 0);
559        } else {
560            if (flags)
561                shifter_out_im(var, shift - 1);
562            tcg_gen_shri_i32(var, var, shift);
563        }
564        break;
565    case 2: /* ASR */
566        if (shift == 0)
567            shift = 32;
568        if (flags)
569            shifter_out_im(var, shift - 1);
570        if (shift == 32)
571          shift = 31;
572        tcg_gen_sari_i32(var, var, shift);
573        break;
574    case 3: /* ROR/RRX */
575        if (shift != 0) {
576            if (flags)
577                shifter_out_im(var, shift - 1);
578            tcg_gen_rori_i32(var, var, shift); break;
579        } else {
580            TCGv tmp = load_cpu_field(CF);
581            if (flags)
582                shifter_out_im(var, 0);
583            tcg_gen_shri_i32(var, var, 1);
584            tcg_gen_shli_i32(tmp, tmp, 31);
585            tcg_gen_or_i32(var, var, tmp);
586            dead_tmp(tmp);
587        }
588    }
589};
590
591static inline void gen_arm_shift_reg(TCGv var, int shiftop,
592                                     TCGv shift, int flags)
593{
594    if (flags) {
595        switch (shiftop) {
596        case 0: gen_helper_shl_cc(var, var, shift); break;
597        case 1: gen_helper_shr_cc(var, var, shift); break;
598        case 2: gen_helper_sar_cc(var, var, shift); break;
599        case 3: gen_helper_ror_cc(var, var, shift); break;
600        }
601    } else {
602        switch (shiftop) {
603        case 0: gen_helper_shl(var, var, shift); break;
604        case 1: gen_helper_shr(var, var, shift); break;
605        case 2: gen_helper_sar(var, var, shift); break;
606        case 3: gen_helper_ror(var, var, shift); break;
607        }
608    }
609    dead_tmp(shift);
610}
611
612#define PAS_OP(pfx) \
613    switch (op2) {  \
614    case 0: gen_pas_helper(glue(pfx,add16)); break; \
615    case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
616    case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
617    case 3: gen_pas_helper(glue(pfx,sub16)); break; \
618    case 4: gen_pas_helper(glue(pfx,add8)); break; \
619    case 7: gen_pas_helper(glue(pfx,sub8)); break; \
620    }
621static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
622{
623    TCGv_ptr tmp;
624
625    switch (op1) {
626#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
627    case 1:
628        tmp = tcg_temp_new_ptr();
629        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
630        PAS_OP(s)
631        break;
632    case 5:
633        tmp = tcg_temp_new_ptr();
634        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
635        PAS_OP(u)
636        break;
637#undef gen_pas_helper
638#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
639    case 2:
640        PAS_OP(q);
641        break;
642    case 3:
643        PAS_OP(sh);
644        break;
645    case 6:
646        PAS_OP(uq);
647        break;
648    case 7:
649        PAS_OP(uh);
650        break;
651#undef gen_pas_helper
652    }
653}
654#undef PAS_OP
655
656/* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings.  */
657#define PAS_OP(pfx) \
658    switch (op2) {  \
659    case 0: gen_pas_helper(glue(pfx,add8)); break; \
660    case 1: gen_pas_helper(glue(pfx,add16)); break; \
661    case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
662    case 4: gen_pas_helper(glue(pfx,sub8)); break; \
663    case 5: gen_pas_helper(glue(pfx,sub16)); break; \
664    case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
665    }
666static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
667{
668    TCGv_ptr tmp;
669
670    switch (op1) {
671#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
672    case 0:
673        tmp = tcg_temp_new_ptr();
674        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
675        PAS_OP(s)
676        break;
677    case 4:
678        tmp = tcg_temp_new_ptr();
679        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
680        PAS_OP(u)
681        break;
682#undef gen_pas_helper
683#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
684    case 1:
685        PAS_OP(q);
686        break;
687    case 2:
688        PAS_OP(sh);
689        break;
690    case 5:
691        PAS_OP(uq);
692        break;
693    case 6:
694        PAS_OP(uh);
695        break;
696#undef gen_pas_helper
697    }
698}
699#undef PAS_OP
700
701static void gen_test_cc(int cc, int label)
702{
703    TCGv tmp;
704    TCGv tmp2;
705    int inv;
706
707    switch (cc) {
708    case 0: /* eq: Z */
709        tmp = load_cpu_field(ZF);
710        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
711        break;
712    case 1: /* ne: !Z */
713        tmp = load_cpu_field(ZF);
714        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
715        break;
716    case 2: /* cs: C */
717        tmp = load_cpu_field(CF);
718        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
719        break;
720    case 3: /* cc: !C */
721        tmp = load_cpu_field(CF);
722        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
723        break;
724    case 4: /* mi: N */
725        tmp = load_cpu_field(NF);
726        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
727        break;
728    case 5: /* pl: !N */
729        tmp = load_cpu_field(NF);
730        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
731        break;
732    case 6: /* vs: V */
733        tmp = load_cpu_field(VF);
734        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
735        break;
736    case 7: /* vc: !V */
737        tmp = load_cpu_field(VF);
738        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
739        break;
740    case 8: /* hi: C && !Z */
741        inv = gen_new_label();
742        tmp = load_cpu_field(CF);
743        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
744        dead_tmp(tmp);
745        tmp = load_cpu_field(ZF);
746        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
747        gen_set_label(inv);
748        break;
749    case 9: /* ls: !C || Z */
750        tmp = load_cpu_field(CF);
751        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
752        dead_tmp(tmp);
753        tmp = load_cpu_field(ZF);
754        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
755        break;
756    case 10: /* ge: N == V -> N ^ V == 0 */
757        tmp = load_cpu_field(VF);
758        tmp2 = load_cpu_field(NF);
759        tcg_gen_xor_i32(tmp, tmp, tmp2);
760        dead_tmp(tmp2);
761        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
762        break;
763    case 11: /* lt: N != V -> N ^ V != 0 */
764        tmp = load_cpu_field(VF);
765        tmp2 = load_cpu_field(NF);
766        tcg_gen_xor_i32(tmp, tmp, tmp2);
767        dead_tmp(tmp2);
768        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
769        break;
770    case 12: /* gt: !Z && N == V */
771        inv = gen_new_label();
772        tmp = load_cpu_field(ZF);
773        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
774        dead_tmp(tmp);
775        tmp = load_cpu_field(VF);
776        tmp2 = load_cpu_field(NF);
777        tcg_gen_xor_i32(tmp, tmp, tmp2);
778        dead_tmp(tmp2);
779        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
780        gen_set_label(inv);
781        break;
782    case 13: /* le: Z || N != V */
783        tmp = load_cpu_field(ZF);
784        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
785        dead_tmp(tmp);
786        tmp = load_cpu_field(VF);
787        tmp2 = load_cpu_field(NF);
788        tcg_gen_xor_i32(tmp, tmp, tmp2);
789        dead_tmp(tmp2);
790        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
791        break;
792    default:
793        fprintf(stderr, "Bad condition code 0x%x\n", cc);
794        abort();
795    }
796    dead_tmp(tmp);
797}
798
799static const uint8_t table_logic_cc[16] = {
800    1, /* and */
801    1, /* xor */
802    0, /* sub */
803    0, /* rsb */
804    0, /* add */
805    0, /* adc */
806    0, /* sbc */
807    0, /* rsc */
808    1, /* andl */
809    1, /* xorl */
810    0, /* cmp */
811    0, /* cmn */
812    1, /* orr */
813    1, /* mov */
814    1, /* bic */
815    1, /* mvn */
816};
817
818/* Set PC and Thumb state from an immediate address.  */
819static inline void gen_bx_im(DisasContext *s, uint32_t addr)
820{
821    TCGv tmp;
822
823    s->is_jmp = DISAS_UPDATE;
824    tmp = new_tmp();
825    if (s->thumb != (addr & 1)) {
826        tcg_gen_movi_i32(tmp, addr & 1);
827        tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, thumb));
828    }
829    tcg_gen_movi_i32(tmp, addr & ~1);
830    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, regs[15]));
831    dead_tmp(tmp);
832}
833
834/* Set PC and Thumb state from var.  var is marked as dead.  */
835static inline void gen_bx(DisasContext *s, TCGv var)
836{
837    TCGv tmp;
838
839    s->is_jmp = DISAS_UPDATE;
840    tmp = new_tmp();
841    tcg_gen_andi_i32(tmp, var, 1);
842    store_cpu_field(tmp, thumb);
843    tcg_gen_andi_i32(var, var, ~1);
844    store_cpu_field(var, regs[15]);
845}
846
847/* TODO: This should be removed.  Use gen_bx instead.  */
848static inline void gen_bx_T0(DisasContext *s)
849{
850    TCGv tmp = new_tmp();
851    tcg_gen_mov_i32(tmp, cpu_T[0]);
852    gen_bx(s, tmp);
853}
854
855/* Variant of store_reg which uses branch&exchange logic when storing
856   to r15 in ARM architecture v7 and above. The source must be a temporary
857   and will be marked as dead. */
858static inline void store_reg_bx(CPUState *env, DisasContext *s,
859                                int reg, TCGv var)
860{
861    if (reg == 15 && ENABLE_ARCH_7) {
862        gen_bx(s, var);
863    } else {
864        store_reg(s, reg, var);
865    }
866}
867
868static inline TCGv gen_ld8s(TCGv addr, int index)
869{
870    TCGv tmp = new_tmp();
871    tcg_gen_qemu_ld8s(tmp, addr, index);
872    return tmp;
873}
874static inline TCGv gen_ld8u(TCGv addr, int index)
875{
876    TCGv tmp = new_tmp();
877    tcg_gen_qemu_ld8u(tmp, addr, index);
878    return tmp;
879}
880static inline TCGv gen_ld16s(TCGv addr, int index)
881{
882    TCGv tmp = new_tmp();
883    tcg_gen_qemu_ld16s(tmp, addr, index);
884    return tmp;
885}
886static inline TCGv gen_ld16u(TCGv addr, int index)
887{
888    TCGv tmp = new_tmp();
889    tcg_gen_qemu_ld16u(tmp, addr, index);
890    return tmp;
891}
892static inline TCGv gen_ld32(TCGv addr, int index)
893{
894    TCGv tmp = new_tmp();
895    tcg_gen_qemu_ld32u(tmp, addr, index);
896    return tmp;
897}
898static inline void gen_st8(TCGv val, TCGv addr, int index)
899{
900    tcg_gen_qemu_st8(val, addr, index);
901    dead_tmp(val);
902}
903static inline void gen_st16(TCGv val, TCGv addr, int index)
904{
905    tcg_gen_qemu_st16(val, addr, index);
906    dead_tmp(val);
907}
908static inline void gen_st32(TCGv val, TCGv addr, int index)
909{
910    tcg_gen_qemu_st32(val, addr, index);
911    dead_tmp(val);
912}
913
914static inline void gen_movl_T0_reg(DisasContext *s, int reg)
915{
916    load_reg_var(s, cpu_T[0], reg);
917}
918
919static inline void gen_movl_T1_reg(DisasContext *s, int reg)
920{
921    load_reg_var(s, cpu_T[1], reg);
922}
923
924static inline void gen_movl_T2_reg(DisasContext *s, int reg)
925{
926    load_reg_var(s, cpu_T[2], reg);
927}
928
929static inline void gen_set_pc_im(uint32_t val)
930{
931    TCGv tmp = new_tmp();
932    tcg_gen_movi_i32(tmp, val);
933    store_cpu_field(tmp, regs[15]);
934}
935
936static inline void gen_movl_reg_TN(DisasContext *s, int reg, int t)
937{
938    TCGv tmp;
939    if (reg == 15) {
940        tmp = new_tmp();
941        tcg_gen_andi_i32(tmp, cpu_T[t], ~1);
942    } else {
943        tmp = cpu_T[t];
944    }
945    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, regs[reg]));
946    if (reg == 15) {
947        dead_tmp(tmp);
948        s->is_jmp = DISAS_JUMP;
949    }
950}
951
952static inline void gen_movl_reg_T0(DisasContext *s, int reg)
953{
954    gen_movl_reg_TN(s, reg, 0);
955}
956
957static inline void gen_movl_reg_T1(DisasContext *s, int reg)
958{
959    gen_movl_reg_TN(s, reg, 1);
960}
961
962/* Force a TB lookup after an instruction that changes the CPU state.  */
963static inline void gen_lookup_tb(DisasContext *s)
964{
965    gen_op_movl_T0_im(s->pc);
966    gen_movl_reg_T0(s, 15);
967    s->is_jmp = DISAS_UPDATE;
968}
969
970static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
971                                       TCGv var)
972{
973    int val, rm, shift, shiftop;
974    TCGv offset;
975
976    if (!(insn & (1 << 25))) {
977        /* immediate */
978        val = insn & 0xfff;
979        if (!(insn & (1 << 23)))
980            val = -val;
981        if (val != 0)
982            tcg_gen_addi_i32(var, var, val);
983    } else {
984        /* shift/register */
985        rm = (insn) & 0xf;
986        shift = (insn >> 7) & 0x1f;
987        shiftop = (insn >> 5) & 3;
988        offset = load_reg(s, rm);
989        gen_arm_shift_im(offset, shiftop, shift, 0);
990        if (!(insn & (1 << 23)))
991            tcg_gen_sub_i32(var, var, offset);
992        else
993            tcg_gen_add_i32(var, var, offset);
994        dead_tmp(offset);
995    }
996}
997
998static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
999                                        int extra, TCGv var)
1000{
1001    int val, rm;
1002    TCGv offset;
1003
1004    if (insn & (1 << 22)) {
1005        /* immediate */
1006        val = (insn & 0xf) | ((insn >> 4) & 0xf0);
1007        if (!(insn & (1 << 23)))
1008            val = -val;
1009        val += extra;
1010        if (val != 0)
1011            tcg_gen_addi_i32(var, var, val);
1012    } else {
1013        /* register */
1014        if (extra)
1015            tcg_gen_addi_i32(var, var, extra);
1016        rm = (insn) & 0xf;
1017        offset = load_reg(s, rm);
1018        if (!(insn & (1 << 23)))
1019            tcg_gen_sub_i32(var, var, offset);
1020        else
1021            tcg_gen_add_i32(var, var, offset);
1022        dead_tmp(offset);
1023    }
1024}
1025
1026#define VFP_OP2(name)                                                 \
1027static inline void gen_vfp_##name(int dp)                             \
1028{                                                                     \
1029    if (dp)                                                           \
1030        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, cpu_env); \
1031    else                                                              \
1032        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, cpu_env); \
1033}
1034
1035VFP_OP2(add)
1036VFP_OP2(sub)
1037VFP_OP2(mul)
1038VFP_OP2(div)
1039
1040#undef VFP_OP2
1041
1042static inline void gen_vfp_abs(int dp)
1043{
1044    if (dp)
1045        gen_helper_vfp_absd(cpu_F0d, cpu_F0d);
1046    else
1047        gen_helper_vfp_abss(cpu_F0s, cpu_F0s);
1048}
1049
1050static inline void gen_vfp_neg(int dp)
1051{
1052    if (dp)
1053        gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
1054    else
1055        gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
1056}
1057
1058static inline void gen_vfp_sqrt(int dp)
1059{
1060    if (dp)
1061        gen_helper_vfp_sqrtd(cpu_F0d, cpu_F0d, cpu_env);
1062    else
1063        gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env);
1064}
1065
1066static inline void gen_vfp_cmp(int dp)
1067{
1068    if (dp)
1069        gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env);
1070    else
1071        gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env);
1072}
1073
1074static inline void gen_vfp_cmpe(int dp)
1075{
1076    if (dp)
1077        gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env);
1078    else
1079        gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env);
1080}
1081
1082static inline void gen_vfp_F1_ld0(int dp)
1083{
1084    if (dp)
1085        tcg_gen_movi_i64(cpu_F1d, 0);
1086    else
1087        tcg_gen_movi_i32(cpu_F1s, 0);
1088}
1089
1090static inline void gen_vfp_uito(int dp)
1091{
1092    if (dp)
1093        gen_helper_vfp_uitod(cpu_F0d, cpu_F0s, cpu_env);
1094    else
1095        gen_helper_vfp_uitos(cpu_F0s, cpu_F0s, cpu_env);
1096}
1097
1098static inline void gen_vfp_sito(int dp)
1099{
1100    if (dp)
1101        gen_helper_vfp_sitod(cpu_F0d, cpu_F0s, cpu_env);
1102    else
1103        gen_helper_vfp_sitos(cpu_F0s, cpu_F0s, cpu_env);
1104}
1105
1106static inline void gen_vfp_toui(int dp)
1107{
1108    if (dp)
1109        gen_helper_vfp_touid(cpu_F0s, cpu_F0d, cpu_env);
1110    else
1111        gen_helper_vfp_touis(cpu_F0s, cpu_F0s, cpu_env);
1112}
1113
1114static inline void gen_vfp_touiz(int dp)
1115{
1116    if (dp)
1117        gen_helper_vfp_touizd(cpu_F0s, cpu_F0d, cpu_env);
1118    else
1119        gen_helper_vfp_touizs(cpu_F0s, cpu_F0s, cpu_env);
1120}
1121
1122static inline void gen_vfp_tosi(int dp)
1123{
1124    if (dp)
1125        gen_helper_vfp_tosid(cpu_F0s, cpu_F0d, cpu_env);
1126    else
1127        gen_helper_vfp_tosis(cpu_F0s, cpu_F0s, cpu_env);
1128}
1129
1130static inline void gen_vfp_tosiz(int dp)
1131{
1132    if (dp)
1133        gen_helper_vfp_tosizd(cpu_F0s, cpu_F0d, cpu_env);
1134    else
1135        gen_helper_vfp_tosizs(cpu_F0s, cpu_F0s, cpu_env);
1136}
1137
1138#define VFP_GEN_FIX(name) \
1139static inline void gen_vfp_##name(int dp, int shift) \
1140{ \
1141    if (dp) \
1142        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tcg_const_i32(shift), cpu_env);\
1143    else \
1144        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tcg_const_i32(shift), cpu_env);\
1145}
1146VFP_GEN_FIX(tosh)
1147VFP_GEN_FIX(tosl)
1148VFP_GEN_FIX(touh)
1149VFP_GEN_FIX(toul)
1150VFP_GEN_FIX(shto)
1151VFP_GEN_FIX(slto)
1152VFP_GEN_FIX(uhto)
1153VFP_GEN_FIX(ulto)
1154#undef VFP_GEN_FIX
1155
1156static inline void gen_vfp_ld(DisasContext *s, int dp)
1157{
1158    if (dp)
1159        tcg_gen_qemu_ld64(cpu_F0d, cpu_T[1], IS_USER(s));
1160    else
1161        tcg_gen_qemu_ld32u(cpu_F0s, cpu_T[1], IS_USER(s));
1162}
1163
1164static inline void gen_vfp_st(DisasContext *s, int dp)
1165{
1166    if (dp)
1167        tcg_gen_qemu_st64(cpu_F0d, cpu_T[1], IS_USER(s));
1168    else
1169        tcg_gen_qemu_st32(cpu_F0s, cpu_T[1], IS_USER(s));
1170}
1171
1172static inline long
1173vfp_reg_offset (int dp, int reg)
1174{
1175    if (dp)
1176        return offsetof(CPUARMState, vfp.regs[reg]);
1177    else if (reg & 1) {
1178        return offsetof(CPUARMState, vfp.regs[reg >> 1])
1179          + offsetof(CPU_DoubleU, l.upper);
1180    } else {
1181        return offsetof(CPUARMState, vfp.regs[reg >> 1])
1182          + offsetof(CPU_DoubleU, l.lower);
1183    }
1184}
1185
1186/* Return the offset of a 32-bit piece of a NEON register.
1187   zero is the least significant end of the register.  */
1188static inline long
1189neon_reg_offset (int reg, int n)
1190{
1191    int sreg;
1192    sreg = reg * 2 + n;
1193    return vfp_reg_offset(0, sreg);
1194}
1195
1196/* FIXME: Remove these.  */
1197#define neon_T0 cpu_T[0]
1198#define neon_T1 cpu_T[1]
1199#define NEON_GET_REG(T, reg, n) \
1200  tcg_gen_ld_i32(neon_##T, cpu_env, neon_reg_offset(reg, n))
1201#define NEON_SET_REG(T, reg, n) \
1202  tcg_gen_st_i32(neon_##T, cpu_env, neon_reg_offset(reg, n))
1203
1204static TCGv neon_load_reg(int reg, int pass)
1205{
1206    TCGv tmp = new_tmp();
1207    tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1208    return tmp;
1209}
1210
1211static void neon_store_reg(int reg, int pass, TCGv var)
1212{
1213    tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1214    dead_tmp(var);
1215}
1216
1217static inline void neon_load_reg64(TCGv_i64 var, int reg)
1218{
1219    tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1220}
1221
1222static inline void neon_store_reg64(TCGv_i64 var, int reg)
1223{
1224    tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1225}
1226
1227#define tcg_gen_ld_f32 tcg_gen_ld_i32
1228#define tcg_gen_ld_f64 tcg_gen_ld_i64
1229#define tcg_gen_st_f32 tcg_gen_st_i32
1230#define tcg_gen_st_f64 tcg_gen_st_i64
1231
1232static inline void gen_mov_F0_vreg(int dp, int reg)
1233{
1234    if (dp)
1235        tcg_gen_ld_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1236    else
1237        tcg_gen_ld_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1238}
1239
1240static inline void gen_mov_F1_vreg(int dp, int reg)
1241{
1242    if (dp)
1243        tcg_gen_ld_f64(cpu_F1d, cpu_env, vfp_reg_offset(dp, reg));
1244    else
1245        tcg_gen_ld_f32(cpu_F1s, cpu_env, vfp_reg_offset(dp, reg));
1246}
1247
1248static inline void gen_mov_vreg_F0(int dp, int reg)
1249{
1250    if (dp)
1251        tcg_gen_st_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1252    else
1253        tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1254}
1255
1256#define ARM_CP_RW_BIT	(1 << 20)
1257
1258static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1259{
1260    tcg_gen_ld_i64(var, cpu_env, offsetof(CPUState, iwmmxt.regs[reg]));
1261}
1262
1263static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1264{
1265    tcg_gen_st_i64(var, cpu_env, offsetof(CPUState, iwmmxt.regs[reg]));
1266}
1267
1268static inline void gen_op_iwmmxt_movl_wCx_T0(int reg)
1269{
1270    tcg_gen_st_i32(cpu_T[0], cpu_env, offsetof(CPUState, iwmmxt.cregs[reg]));
1271}
1272
1273static inline void gen_op_iwmmxt_movl_T0_wCx(int reg)
1274{
1275    tcg_gen_ld_i32(cpu_T[0], cpu_env, offsetof(CPUState, iwmmxt.cregs[reg]));
1276}
1277
1278static inline void gen_op_iwmmxt_movl_T1_wCx(int reg)
1279{
1280    tcg_gen_ld_i32(cpu_T[1], cpu_env, offsetof(CPUState, iwmmxt.cregs[reg]));
1281}
1282
1283static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1284{
1285    iwmmxt_store_reg(cpu_M0, rn);
1286}
1287
1288static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1289{
1290    iwmmxt_load_reg(cpu_M0, rn);
1291}
1292
1293static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1294{
1295    iwmmxt_load_reg(cpu_V1, rn);
1296    tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1297}
1298
1299static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1300{
1301    iwmmxt_load_reg(cpu_V1, rn);
1302    tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1303}
1304
1305static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1306{
1307    iwmmxt_load_reg(cpu_V1, rn);
1308    tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1309}
1310
1311#define IWMMXT_OP(name) \
1312static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1313{ \
1314    iwmmxt_load_reg(cpu_V1, rn); \
1315    gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1316}
1317
1318#define IWMMXT_OP_ENV(name) \
1319static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1320{ \
1321    iwmmxt_load_reg(cpu_V1, rn); \
1322    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1323}
1324
1325#define IWMMXT_OP_ENV_SIZE(name) \
1326IWMMXT_OP_ENV(name##b) \
1327IWMMXT_OP_ENV(name##w) \
1328IWMMXT_OP_ENV(name##l)
1329
1330#define IWMMXT_OP_ENV1(name) \
1331static inline void gen_op_iwmmxt_##name##_M0(void) \
1332{ \
1333    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1334}
1335
1336IWMMXT_OP(maddsq)
1337IWMMXT_OP(madduq)
1338IWMMXT_OP(sadb)
1339IWMMXT_OP(sadw)
1340IWMMXT_OP(mulslw)
1341IWMMXT_OP(mulshw)
1342IWMMXT_OP(mululw)
1343IWMMXT_OP(muluhw)
1344IWMMXT_OP(macsw)
1345IWMMXT_OP(macuw)
1346
1347IWMMXT_OP_ENV_SIZE(unpackl)
1348IWMMXT_OP_ENV_SIZE(unpackh)
1349
1350IWMMXT_OP_ENV1(unpacklub)
1351IWMMXT_OP_ENV1(unpackluw)
1352IWMMXT_OP_ENV1(unpacklul)
1353IWMMXT_OP_ENV1(unpackhub)
1354IWMMXT_OP_ENV1(unpackhuw)
1355IWMMXT_OP_ENV1(unpackhul)
1356IWMMXT_OP_ENV1(unpacklsb)
1357IWMMXT_OP_ENV1(unpacklsw)
1358IWMMXT_OP_ENV1(unpacklsl)
1359IWMMXT_OP_ENV1(unpackhsb)
1360IWMMXT_OP_ENV1(unpackhsw)
1361IWMMXT_OP_ENV1(unpackhsl)
1362
1363IWMMXT_OP_ENV_SIZE(cmpeq)
1364IWMMXT_OP_ENV_SIZE(cmpgtu)
1365IWMMXT_OP_ENV_SIZE(cmpgts)
1366
1367IWMMXT_OP_ENV_SIZE(mins)
1368IWMMXT_OP_ENV_SIZE(minu)
1369IWMMXT_OP_ENV_SIZE(maxs)
1370IWMMXT_OP_ENV_SIZE(maxu)
1371
1372IWMMXT_OP_ENV_SIZE(subn)
1373IWMMXT_OP_ENV_SIZE(addn)
1374IWMMXT_OP_ENV_SIZE(subu)
1375IWMMXT_OP_ENV_SIZE(addu)
1376IWMMXT_OP_ENV_SIZE(subs)
1377IWMMXT_OP_ENV_SIZE(adds)
1378
1379IWMMXT_OP_ENV(avgb0)
1380IWMMXT_OP_ENV(avgb1)
1381IWMMXT_OP_ENV(avgw0)
1382IWMMXT_OP_ENV(avgw1)
1383
1384IWMMXT_OP(msadb)
1385
1386IWMMXT_OP_ENV(packuw)
1387IWMMXT_OP_ENV(packul)
1388IWMMXT_OP_ENV(packuq)
1389IWMMXT_OP_ENV(packsw)
1390IWMMXT_OP_ENV(packsl)
1391IWMMXT_OP_ENV(packsq)
1392
1393static inline void gen_op_iwmmxt_muladdsl_M0_T0_T1(void)
1394{
1395    gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, cpu_T[0], cpu_T[1]);
1396}
1397
1398static inline void gen_op_iwmmxt_muladdsw_M0_T0_T1(void)
1399{
1400    gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, cpu_T[0], cpu_T[1]);
1401}
1402
1403static inline void gen_op_iwmmxt_muladdswl_M0_T0_T1(void)
1404{
1405    gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, cpu_T[0], cpu_T[1]);
1406}
1407
1408static inline void gen_op_iwmmxt_align_M0_T0_wRn(int rn)
1409{
1410    iwmmxt_load_reg(cpu_V1, rn);
1411    gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, cpu_T[0]);
1412}
1413
1414static inline void gen_op_iwmmxt_insr_M0_T0_T1(int shift)
1415{
1416    TCGv tmp = tcg_const_i32(shift);
1417    gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, cpu_T[0], cpu_T[1], tmp);
1418}
1419
1420static inline void gen_op_iwmmxt_extrsb_T0_M0(int shift)
1421{
1422    tcg_gen_shri_i64(cpu_M0, cpu_M0, shift);
1423    tcg_gen_trunc_i64_i32(cpu_T[0], cpu_M0);
1424    tcg_gen_ext8s_i32(cpu_T[0], cpu_T[0]);
1425}
1426
1427static inline void gen_op_iwmmxt_extrsw_T0_M0(int shift)
1428{
1429    tcg_gen_shri_i64(cpu_M0, cpu_M0, shift);
1430    tcg_gen_trunc_i64_i32(cpu_T[0], cpu_M0);
1431    tcg_gen_ext16s_i32(cpu_T[0], cpu_T[0]);
1432}
1433
1434static inline void gen_op_iwmmxt_extru_T0_M0(int shift, uint32_t mask)
1435{
1436    tcg_gen_shri_i64(cpu_M0, cpu_M0, shift);
1437    tcg_gen_trunc_i64_i32(cpu_T[0], cpu_M0);
1438    if (mask != ~0u)
1439        tcg_gen_andi_i32(cpu_T[0], cpu_T[0], mask);
1440}
1441
1442static void gen_op_iwmmxt_set_mup(void)
1443{
1444    TCGv tmp;
1445    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1446    tcg_gen_ori_i32(tmp, tmp, 2);
1447    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1448}
1449
1450static void gen_op_iwmmxt_set_cup(void)
1451{
1452    TCGv tmp;
1453    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1454    tcg_gen_ori_i32(tmp, tmp, 1);
1455    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1456}
1457
1458static void gen_op_iwmmxt_setpsr_nz(void)
1459{
1460    TCGv tmp = new_tmp();
1461    gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1462    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1463}
1464
1465static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1466{
1467    iwmmxt_load_reg(cpu_V1, rn);
1468    tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1469    tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1470}
1471
1472
1473static void gen_iwmmxt_movl_T0_T1_wRn(int rn)
1474{
1475    iwmmxt_load_reg(cpu_V0, rn);
1476    tcg_gen_trunc_i64_i32(cpu_T[0], cpu_V0);
1477    tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
1478    tcg_gen_trunc_i64_i32(cpu_T[1], cpu_V0);
1479}
1480
1481static void gen_iwmmxt_movl_wRn_T0_T1(int rn)
1482{
1483    tcg_gen_concat_i32_i64(cpu_V0, cpu_T[0], cpu_T[1]);
1484    iwmmxt_store_reg(cpu_V0, rn);
1485}
1486
1487static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn)
1488{
1489    int rd;
1490    uint32_t offset;
1491
1492    rd = (insn >> 16) & 0xf;
1493    gen_movl_T1_reg(s, rd);
1494
1495    offset = (insn & 0xff) << ((insn >> 7) & 2);
1496    if (insn & (1 << 24)) {
1497        /* Pre indexed */
1498        if (insn & (1 << 23))
1499            gen_op_addl_T1_im(offset);
1500        else
1501            gen_op_addl_T1_im(-offset);
1502
1503        if (insn & (1 << 21))
1504            gen_movl_reg_T1(s, rd);
1505    } else if (insn & (1 << 21)) {
1506        /* Post indexed */
1507        if (insn & (1 << 23))
1508            gen_op_movl_T0_im(offset);
1509        else
1510            gen_op_movl_T0_im(- offset);
1511        gen_op_addl_T0_T1();
1512        gen_movl_reg_T0(s, rd);
1513    } else if (!(insn & (1 << 23)))
1514        return 1;
1515    return 0;
1516}
1517
1518static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask)
1519{
1520    int rd = (insn >> 0) & 0xf;
1521
1522    if (insn & (1 << 8))
1523        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3)
1524            return 1;
1525        else
1526            gen_op_iwmmxt_movl_T0_wCx(rd);
1527    else
1528        gen_iwmmxt_movl_T0_T1_wRn(rd);
1529
1530    gen_op_movl_T1_im(mask);
1531    gen_op_andl_T0_T1();
1532    return 0;
1533}
1534
1535/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occured
1536   (ie. an undefined instruction).  */
1537static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
1538{
1539    int rd, wrd;
1540    int rdhi, rdlo, rd0, rd1, i;
1541    TCGv tmp;
1542
1543    if ((insn & 0x0e000e00) == 0x0c000000) {
1544        if ((insn & 0x0fe00ff0) == 0x0c400000) {
1545            wrd = insn & 0xf;
1546            rdlo = (insn >> 12) & 0xf;
1547            rdhi = (insn >> 16) & 0xf;
1548            if (insn & ARM_CP_RW_BIT) {			/* TMRRC */
1549                gen_iwmmxt_movl_T0_T1_wRn(wrd);
1550                gen_movl_reg_T0(s, rdlo);
1551                gen_movl_reg_T1(s, rdhi);
1552            } else {					/* TMCRR */
1553                gen_movl_T0_reg(s, rdlo);
1554                gen_movl_T1_reg(s, rdhi);
1555                gen_iwmmxt_movl_wRn_T0_T1(wrd);
1556                gen_op_iwmmxt_set_mup();
1557            }
1558            return 0;
1559        }
1560
1561        wrd = (insn >> 12) & 0xf;
1562        if (gen_iwmmxt_address(s, insn))
1563            return 1;
1564        if (insn & ARM_CP_RW_BIT) {
1565            if ((insn >> 28) == 0xf) {			/* WLDRW wCx */
1566                tmp = gen_ld32(cpu_T[1], IS_USER(s));
1567                tcg_gen_mov_i32(cpu_T[0], tmp);
1568                dead_tmp(tmp);
1569                gen_op_iwmmxt_movl_wCx_T0(wrd);
1570            } else {
1571                i = 1;
1572                if (insn & (1 << 8)) {
1573                    if (insn & (1 << 22)) {		/* WLDRD */
1574                        tcg_gen_qemu_ld64(cpu_M0, cpu_T[1], IS_USER(s));
1575                        i = 0;
1576                    } else {				/* WLDRW wRd */
1577                        tmp = gen_ld32(cpu_T[1], IS_USER(s));
1578                    }
1579                } else {
1580                    if (insn & (1 << 22)) {		/* WLDRH */
1581                        tmp = gen_ld16u(cpu_T[1], IS_USER(s));
1582                    } else {				/* WLDRB */
1583                        tmp = gen_ld8u(cpu_T[1], IS_USER(s));
1584                    }
1585                }
1586                if (i) {
1587                    tcg_gen_extu_i32_i64(cpu_M0, tmp);
1588                    dead_tmp(tmp);
1589                }
1590                gen_op_iwmmxt_movq_wRn_M0(wrd);
1591            }
1592        } else {
1593            if ((insn >> 28) == 0xf) {			/* WSTRW wCx */
1594                gen_op_iwmmxt_movl_T0_wCx(wrd);
1595                tmp = new_tmp();
1596                tcg_gen_mov_i32(tmp, cpu_T[0]);
1597                gen_st32(tmp, cpu_T[1], IS_USER(s));
1598            } else {
1599                gen_op_iwmmxt_movq_M0_wRn(wrd);
1600                tmp = new_tmp();
1601                if (insn & (1 << 8)) {
1602                    if (insn & (1 << 22)) {		/* WSTRD */
1603                        dead_tmp(tmp);
1604                        tcg_gen_qemu_st64(cpu_M0, cpu_T[1], IS_USER(s));
1605                    } else {				/* WSTRW wRd */
1606                        tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1607                        gen_st32(tmp, cpu_T[1], IS_USER(s));
1608                    }
1609                } else {
1610                    if (insn & (1 << 22)) {		/* WSTRH */
1611                        tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1612                        gen_st16(tmp, cpu_T[1], IS_USER(s));
1613                    } else {				/* WSTRB */
1614                        tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1615                        gen_st8(tmp, cpu_T[1], IS_USER(s));
1616                    }
1617                }
1618            }
1619        }
1620        return 0;
1621    }
1622
1623    if ((insn & 0x0f000000) != 0x0e000000)
1624        return 1;
1625
1626    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1627    case 0x000:						/* WOR */
1628        wrd = (insn >> 12) & 0xf;
1629        rd0 = (insn >> 0) & 0xf;
1630        rd1 = (insn >> 16) & 0xf;
1631        gen_op_iwmmxt_movq_M0_wRn(rd0);
1632        gen_op_iwmmxt_orq_M0_wRn(rd1);
1633        gen_op_iwmmxt_setpsr_nz();
1634        gen_op_iwmmxt_movq_wRn_M0(wrd);
1635        gen_op_iwmmxt_set_mup();
1636        gen_op_iwmmxt_set_cup();
1637        break;
1638    case 0x011:						/* TMCR */
1639        if (insn & 0xf)
1640            return 1;
1641        rd = (insn >> 12) & 0xf;
1642        wrd = (insn >> 16) & 0xf;
1643        switch (wrd) {
1644        case ARM_IWMMXT_wCID:
1645        case ARM_IWMMXT_wCASF:
1646            break;
1647        case ARM_IWMMXT_wCon:
1648            gen_op_iwmmxt_set_cup();
1649            /* Fall through.  */
1650        case ARM_IWMMXT_wCSSF:
1651            gen_op_iwmmxt_movl_T0_wCx(wrd);
1652            gen_movl_T1_reg(s, rd);
1653            gen_op_bicl_T0_T1();
1654            gen_op_iwmmxt_movl_wCx_T0(wrd);
1655            break;
1656        case ARM_IWMMXT_wCGR0:
1657        case ARM_IWMMXT_wCGR1:
1658        case ARM_IWMMXT_wCGR2:
1659        case ARM_IWMMXT_wCGR3:
1660            gen_op_iwmmxt_set_cup();
1661            gen_movl_reg_T0(s, rd);
1662            gen_op_iwmmxt_movl_wCx_T0(wrd);
1663            break;
1664        default:
1665            return 1;
1666        }
1667        break;
1668    case 0x100:						/* WXOR */
1669        wrd = (insn >> 12) & 0xf;
1670        rd0 = (insn >> 0) & 0xf;
1671        rd1 = (insn >> 16) & 0xf;
1672        gen_op_iwmmxt_movq_M0_wRn(rd0);
1673        gen_op_iwmmxt_xorq_M0_wRn(rd1);
1674        gen_op_iwmmxt_setpsr_nz();
1675        gen_op_iwmmxt_movq_wRn_M0(wrd);
1676        gen_op_iwmmxt_set_mup();
1677        gen_op_iwmmxt_set_cup();
1678        break;
1679    case 0x111:						/* TMRC */
1680        if (insn & 0xf)
1681            return 1;
1682        rd = (insn >> 12) & 0xf;
1683        wrd = (insn >> 16) & 0xf;
1684        gen_op_iwmmxt_movl_T0_wCx(wrd);
1685        gen_movl_reg_T0(s, rd);
1686        break;
1687    case 0x300:						/* WANDN */
1688        wrd = (insn >> 12) & 0xf;
1689        rd0 = (insn >> 0) & 0xf;
1690        rd1 = (insn >> 16) & 0xf;
1691        gen_op_iwmmxt_movq_M0_wRn(rd0);
1692        tcg_gen_neg_i64(cpu_M0, cpu_M0);
1693        gen_op_iwmmxt_andq_M0_wRn(rd1);
1694        gen_op_iwmmxt_setpsr_nz();
1695        gen_op_iwmmxt_movq_wRn_M0(wrd);
1696        gen_op_iwmmxt_set_mup();
1697        gen_op_iwmmxt_set_cup();
1698        break;
1699    case 0x200:						/* WAND */
1700        wrd = (insn >> 12) & 0xf;
1701        rd0 = (insn >> 0) & 0xf;
1702        rd1 = (insn >> 16) & 0xf;
1703        gen_op_iwmmxt_movq_M0_wRn(rd0);
1704        gen_op_iwmmxt_andq_M0_wRn(rd1);
1705        gen_op_iwmmxt_setpsr_nz();
1706        gen_op_iwmmxt_movq_wRn_M0(wrd);
1707        gen_op_iwmmxt_set_mup();
1708        gen_op_iwmmxt_set_cup();
1709        break;
1710    case 0x810: case 0xa10:				/* WMADD */
1711        wrd = (insn >> 12) & 0xf;
1712        rd0 = (insn >> 0) & 0xf;
1713        rd1 = (insn >> 16) & 0xf;
1714        gen_op_iwmmxt_movq_M0_wRn(rd0);
1715        if (insn & (1 << 21))
1716            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1717        else
1718            gen_op_iwmmxt_madduq_M0_wRn(rd1);
1719        gen_op_iwmmxt_movq_wRn_M0(wrd);
1720        gen_op_iwmmxt_set_mup();
1721        break;
1722    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:	/* WUNPCKIL */
1723        wrd = (insn >> 12) & 0xf;
1724        rd0 = (insn >> 16) & 0xf;
1725        rd1 = (insn >> 0) & 0xf;
1726        gen_op_iwmmxt_movq_M0_wRn(rd0);
1727        switch ((insn >> 22) & 3) {
1728        case 0:
1729            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1730            break;
1731        case 1:
1732            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1733            break;
1734        case 2:
1735            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1736            break;
1737        case 3:
1738            return 1;
1739        }
1740        gen_op_iwmmxt_movq_wRn_M0(wrd);
1741        gen_op_iwmmxt_set_mup();
1742        gen_op_iwmmxt_set_cup();
1743        break;
1744    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:	/* WUNPCKIH */
1745        wrd = (insn >> 12) & 0xf;
1746        rd0 = (insn >> 16) & 0xf;
1747        rd1 = (insn >> 0) & 0xf;
1748        gen_op_iwmmxt_movq_M0_wRn(rd0);
1749        switch ((insn >> 22) & 3) {
1750        case 0:
1751            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1752            break;
1753        case 1:
1754            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1755            break;
1756        case 2:
1757            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1758            break;
1759        case 3:
1760            return 1;
1761        }
1762        gen_op_iwmmxt_movq_wRn_M0(wrd);
1763        gen_op_iwmmxt_set_mup();
1764        gen_op_iwmmxt_set_cup();
1765        break;
1766    case 0x012: case 0x112: case 0x412: case 0x512:	/* WSAD */
1767        wrd = (insn >> 12) & 0xf;
1768        rd0 = (insn >> 16) & 0xf;
1769        rd1 = (insn >> 0) & 0xf;
1770        gen_op_iwmmxt_movq_M0_wRn(rd0);
1771        if (insn & (1 << 22))
1772            gen_op_iwmmxt_sadw_M0_wRn(rd1);
1773        else
1774            gen_op_iwmmxt_sadb_M0_wRn(rd1);
1775        if (!(insn & (1 << 20)))
1776            gen_op_iwmmxt_addl_M0_wRn(wrd);
1777        gen_op_iwmmxt_movq_wRn_M0(wrd);
1778        gen_op_iwmmxt_set_mup();
1779        break;
1780    case 0x010: case 0x110: case 0x210: case 0x310:	/* WMUL */
1781        wrd = (insn >> 12) & 0xf;
1782        rd0 = (insn >> 16) & 0xf;
1783        rd1 = (insn >> 0) & 0xf;
1784        gen_op_iwmmxt_movq_M0_wRn(rd0);
1785        if (insn & (1 << 21)) {
1786            if (insn & (1 << 20))
1787                gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1788            else
1789                gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1790        } else {
1791            if (insn & (1 << 20))
1792                gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1793            else
1794                gen_op_iwmmxt_mululw_M0_wRn(rd1);
1795        }
1796        gen_op_iwmmxt_movq_wRn_M0(wrd);
1797        gen_op_iwmmxt_set_mup();
1798        break;
1799    case 0x410: case 0x510: case 0x610: case 0x710:	/* WMAC */
1800        wrd = (insn >> 12) & 0xf;
1801        rd0 = (insn >> 16) & 0xf;
1802        rd1 = (insn >> 0) & 0xf;
1803        gen_op_iwmmxt_movq_M0_wRn(rd0);
1804        if (insn & (1 << 21))
1805            gen_op_iwmmxt_macsw_M0_wRn(rd1);
1806        else
1807            gen_op_iwmmxt_macuw_M0_wRn(rd1);
1808        if (!(insn & (1 << 20))) {
1809            iwmmxt_load_reg(cpu_V1, wrd);
1810            tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1811        }
1812        gen_op_iwmmxt_movq_wRn_M0(wrd);
1813        gen_op_iwmmxt_set_mup();
1814        break;
1815    case 0x006: case 0x406: case 0x806: case 0xc06:	/* WCMPEQ */
1816        wrd = (insn >> 12) & 0xf;
1817        rd0 = (insn >> 16) & 0xf;
1818        rd1 = (insn >> 0) & 0xf;
1819        gen_op_iwmmxt_movq_M0_wRn(rd0);
1820        switch ((insn >> 22) & 3) {
1821        case 0:
1822            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1823            break;
1824        case 1:
1825            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1826            break;
1827        case 2:
1828            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1829            break;
1830        case 3:
1831            return 1;
1832        }
1833        gen_op_iwmmxt_movq_wRn_M0(wrd);
1834        gen_op_iwmmxt_set_mup();
1835        gen_op_iwmmxt_set_cup();
1836        break;
1837    case 0x800: case 0x900: case 0xc00: case 0xd00:	/* WAVG2 */
1838        wrd = (insn >> 12) & 0xf;
1839        rd0 = (insn >> 16) & 0xf;
1840        rd1 = (insn >> 0) & 0xf;
1841        gen_op_iwmmxt_movq_M0_wRn(rd0);
1842        if (insn & (1 << 22)) {
1843            if (insn & (1 << 20))
1844                gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1845            else
1846                gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1847        } else {
1848            if (insn & (1 << 20))
1849                gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1850            else
1851                gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1852        }
1853        gen_op_iwmmxt_movq_wRn_M0(wrd);
1854        gen_op_iwmmxt_set_mup();
1855        gen_op_iwmmxt_set_cup();
1856        break;
1857    case 0x802: case 0x902: case 0xa02: case 0xb02:	/* WALIGNR */
1858        wrd = (insn >> 12) & 0xf;
1859        rd0 = (insn >> 16) & 0xf;
1860        rd1 = (insn >> 0) & 0xf;
1861        gen_op_iwmmxt_movq_M0_wRn(rd0);
1862        gen_op_iwmmxt_movl_T0_wCx(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1863        gen_op_movl_T1_im(7);
1864        gen_op_andl_T0_T1();
1865        gen_op_iwmmxt_align_M0_T0_wRn(rd1);
1866        gen_op_iwmmxt_movq_wRn_M0(wrd);
1867        gen_op_iwmmxt_set_mup();
1868        break;
1869    case 0x601: case 0x605: case 0x609: case 0x60d:	/* TINSR */
1870        rd = (insn >> 12) & 0xf;
1871        wrd = (insn >> 16) & 0xf;
1872        gen_movl_T0_reg(s, rd);
1873        gen_op_iwmmxt_movq_M0_wRn(wrd);
1874        switch ((insn >> 6) & 3) {
1875        case 0:
1876            gen_op_movl_T1_im(0xff);
1877            gen_op_iwmmxt_insr_M0_T0_T1((insn & 7) << 3);
1878            break;
1879        case 1:
1880            gen_op_movl_T1_im(0xffff);
1881            gen_op_iwmmxt_insr_M0_T0_T1((insn & 3) << 4);
1882            break;
1883        case 2:
1884            gen_op_movl_T1_im(0xffffffff);
1885            gen_op_iwmmxt_insr_M0_T0_T1((insn & 1) << 5);
1886            break;
1887        case 3:
1888            return 1;
1889        }
1890        gen_op_iwmmxt_movq_wRn_M0(wrd);
1891        gen_op_iwmmxt_set_mup();
1892        break;
1893    case 0x107: case 0x507: case 0x907: case 0xd07:	/* TEXTRM */
1894        rd = (insn >> 12) & 0xf;
1895        wrd = (insn >> 16) & 0xf;
1896        if (rd == 15)
1897            return 1;
1898        gen_op_iwmmxt_movq_M0_wRn(wrd);
1899        switch ((insn >> 22) & 3) {
1900        case 0:
1901            if (insn & 8)
1902                gen_op_iwmmxt_extrsb_T0_M0((insn & 7) << 3);
1903            else {
1904                gen_op_iwmmxt_extru_T0_M0((insn & 7) << 3, 0xff);
1905            }
1906            break;
1907        case 1:
1908            if (insn & 8)
1909                gen_op_iwmmxt_extrsw_T0_M0((insn & 3) << 4);
1910            else {
1911                gen_op_iwmmxt_extru_T0_M0((insn & 3) << 4, 0xffff);
1912            }
1913            break;
1914        case 2:
1915            gen_op_iwmmxt_extru_T0_M0((insn & 1) << 5, ~0u);
1916            break;
1917        case 3:
1918            return 1;
1919        }
1920        gen_movl_reg_T0(s, rd);
1921        break;
1922    case 0x117: case 0x517: case 0x917: case 0xd17:	/* TEXTRC */
1923        if ((insn & 0x000ff008) != 0x0003f000)
1924            return 1;
1925        gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
1926        switch ((insn >> 22) & 3) {
1927        case 0:
1928            gen_op_shrl_T1_im(((insn & 7) << 2) + 0);
1929            break;
1930        case 1:
1931            gen_op_shrl_T1_im(((insn & 3) << 3) + 4);
1932            break;
1933        case 2:
1934            gen_op_shrl_T1_im(((insn & 1) << 4) + 12);
1935            break;
1936        case 3:
1937            return 1;
1938        }
1939        gen_op_shll_T1_im(28);
1940        gen_set_nzcv(cpu_T[1]);
1941        break;
1942    case 0x401: case 0x405: case 0x409: case 0x40d:	/* TBCST */
1943        rd = (insn >> 12) & 0xf;
1944        wrd = (insn >> 16) & 0xf;
1945        gen_movl_T0_reg(s, rd);
1946        switch ((insn >> 6) & 3) {
1947        case 0:
1948            gen_helper_iwmmxt_bcstb(cpu_M0, cpu_T[0]);
1949            break;
1950        case 1:
1951            gen_helper_iwmmxt_bcstw(cpu_M0, cpu_T[0]);
1952            break;
1953        case 2:
1954            gen_helper_iwmmxt_bcstl(cpu_M0, cpu_T[0]);
1955            break;
1956        case 3:
1957            return 1;
1958        }
1959        gen_op_iwmmxt_movq_wRn_M0(wrd);
1960        gen_op_iwmmxt_set_mup();
1961        break;
1962    case 0x113: case 0x513: case 0x913: case 0xd13:	/* TANDC */
1963        if ((insn & 0x000ff00f) != 0x0003f000)
1964            return 1;
1965        gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
1966        switch ((insn >> 22) & 3) {
1967        case 0:
1968            for (i = 0; i < 7; i ++) {
1969                gen_op_shll_T1_im(4);
1970                gen_op_andl_T0_T1();
1971            }
1972            break;
1973        case 1:
1974            for (i = 0; i < 3; i ++) {
1975                gen_op_shll_T1_im(8);
1976                gen_op_andl_T0_T1();
1977            }
1978            break;
1979        case 2:
1980            gen_op_shll_T1_im(16);
1981            gen_op_andl_T0_T1();
1982            break;
1983        case 3:
1984            return 1;
1985        }
1986        gen_set_nzcv(cpu_T[0]);
1987        break;
1988    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:	/* WACC */
1989        wrd = (insn >> 12) & 0xf;
1990        rd0 = (insn >> 16) & 0xf;
1991        gen_op_iwmmxt_movq_M0_wRn(rd0);
1992        switch ((insn >> 22) & 3) {
1993        case 0:
1994            gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1995            break;
1996        case 1:
1997            gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1998            break;
1999        case 2:
2000            gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2001            break;
2002        case 3:
2003            return 1;
2004        }
2005        gen_op_iwmmxt_movq_wRn_M0(wrd);
2006        gen_op_iwmmxt_set_mup();
2007        break;
2008    case 0x115: case 0x515: case 0x915: case 0xd15:	/* TORC */
2009        if ((insn & 0x000ff00f) != 0x0003f000)
2010            return 1;
2011        gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
2012        switch ((insn >> 22) & 3) {
2013        case 0:
2014            for (i = 0; i < 7; i ++) {
2015                gen_op_shll_T1_im(4);
2016                gen_op_orl_T0_T1();
2017            }
2018            break;
2019        case 1:
2020            for (i = 0; i < 3; i ++) {
2021                gen_op_shll_T1_im(8);
2022                gen_op_orl_T0_T1();
2023            }
2024            break;
2025        case 2:
2026            gen_op_shll_T1_im(16);
2027            gen_op_orl_T0_T1();
2028            break;
2029        case 3:
2030            return 1;
2031        }
2032        gen_set_nzcv(cpu_T[0]);
2033        break;
2034    case 0x103: case 0x503: case 0x903: case 0xd03:	/* TMOVMSK */
2035        rd = (insn >> 12) & 0xf;
2036        rd0 = (insn >> 16) & 0xf;
2037        if ((insn & 0xf) != 0)
2038            return 1;
2039        gen_op_iwmmxt_movq_M0_wRn(rd0);
2040        switch ((insn >> 22) & 3) {
2041        case 0:
2042            gen_helper_iwmmxt_msbb(cpu_T[0], cpu_M0);
2043            break;
2044        case 1:
2045            gen_helper_iwmmxt_msbw(cpu_T[0], cpu_M0);
2046            break;
2047        case 2:
2048            gen_helper_iwmmxt_msbl(cpu_T[0], cpu_M0);
2049            break;
2050        case 3:
2051            return 1;
2052        }
2053        gen_movl_reg_T0(s, rd);
2054        break;
2055    case 0x106: case 0x306: case 0x506: case 0x706:	/* WCMPGT */
2056    case 0x906: case 0xb06: case 0xd06: case 0xf06:
2057        wrd = (insn >> 12) & 0xf;
2058        rd0 = (insn >> 16) & 0xf;
2059        rd1 = (insn >> 0) & 0xf;
2060        gen_op_iwmmxt_movq_M0_wRn(rd0);
2061        switch ((insn >> 22) & 3) {
2062        case 0:
2063            if (insn & (1 << 21))
2064                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2065            else
2066                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2067            break;
2068        case 1:
2069            if (insn & (1 << 21))
2070                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2071            else
2072                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2073            break;
2074        case 2:
2075            if (insn & (1 << 21))
2076                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2077            else
2078                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2079            break;
2080        case 3:
2081            return 1;
2082        }
2083        gen_op_iwmmxt_movq_wRn_M0(wrd);
2084        gen_op_iwmmxt_set_mup();
2085        gen_op_iwmmxt_set_cup();
2086        break;
2087    case 0x00e: case 0x20e: case 0x40e: case 0x60e:	/* WUNPCKEL */
2088    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2089        wrd = (insn >> 12) & 0xf;
2090        rd0 = (insn >> 16) & 0xf;
2091        gen_op_iwmmxt_movq_M0_wRn(rd0);
2092        switch ((insn >> 22) & 3) {
2093        case 0:
2094            if (insn & (1 << 21))
2095                gen_op_iwmmxt_unpacklsb_M0();
2096            else
2097                gen_op_iwmmxt_unpacklub_M0();
2098            break;
2099        case 1:
2100            if (insn & (1 << 21))
2101                gen_op_iwmmxt_unpacklsw_M0();
2102            else
2103                gen_op_iwmmxt_unpackluw_M0();
2104            break;
2105        case 2:
2106            if (insn & (1 << 21))
2107                gen_op_iwmmxt_unpacklsl_M0();
2108            else
2109                gen_op_iwmmxt_unpacklul_M0();
2110            break;
2111        case 3:
2112            return 1;
2113        }
2114        gen_op_iwmmxt_movq_wRn_M0(wrd);
2115        gen_op_iwmmxt_set_mup();
2116        gen_op_iwmmxt_set_cup();
2117        break;
2118    case 0x00c: case 0x20c: case 0x40c: case 0x60c:	/* WUNPCKEH */
2119    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2120        wrd = (insn >> 12) & 0xf;
2121        rd0 = (insn >> 16) & 0xf;
2122        gen_op_iwmmxt_movq_M0_wRn(rd0);
2123        switch ((insn >> 22) & 3) {
2124        case 0:
2125            if (insn & (1 << 21))
2126                gen_op_iwmmxt_unpackhsb_M0();
2127            else
2128                gen_op_iwmmxt_unpackhub_M0();
2129            break;
2130        case 1:
2131            if (insn & (1 << 21))
2132                gen_op_iwmmxt_unpackhsw_M0();
2133            else
2134                gen_op_iwmmxt_unpackhuw_M0();
2135            break;
2136        case 2:
2137            if (insn & (1 << 21))
2138                gen_op_iwmmxt_unpackhsl_M0();
2139            else
2140                gen_op_iwmmxt_unpackhul_M0();
2141            break;
2142        case 3:
2143            return 1;
2144        }
2145        gen_op_iwmmxt_movq_wRn_M0(wrd);
2146        gen_op_iwmmxt_set_mup();
2147        gen_op_iwmmxt_set_cup();
2148        break;
2149    case 0x204: case 0x604: case 0xa04: case 0xe04:	/* WSRL */
2150    case 0x214: case 0x614: case 0xa14: case 0xe14:
2151        wrd = (insn >> 12) & 0xf;
2152        rd0 = (insn >> 16) & 0xf;
2153        gen_op_iwmmxt_movq_M0_wRn(rd0);
2154        if (gen_iwmmxt_shift(insn, 0xff))
2155            return 1;
2156        switch ((insn >> 22) & 3) {
2157        case 0:
2158            return 1;
2159        case 1:
2160            gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2161            break;
2162        case 2:
2163            gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2164            break;
2165        case 3:
2166            gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2167            break;
2168        }
2169        gen_op_iwmmxt_movq_wRn_M0(wrd);
2170        gen_op_iwmmxt_set_mup();
2171        gen_op_iwmmxt_set_cup();
2172        break;
2173    case 0x004: case 0x404: case 0x804: case 0xc04:	/* WSRA */
2174    case 0x014: case 0x414: case 0x814: case 0xc14:
2175        wrd = (insn >> 12) & 0xf;
2176        rd0 = (insn >> 16) & 0xf;
2177        gen_op_iwmmxt_movq_M0_wRn(rd0);
2178        if (gen_iwmmxt_shift(insn, 0xff))
2179            return 1;
2180        switch ((insn >> 22) & 3) {
2181        case 0:
2182            return 1;
2183        case 1:
2184            gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2185            break;
2186        case 2:
2187            gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2188            break;
2189        case 3:
2190            gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2191            break;
2192        }
2193        gen_op_iwmmxt_movq_wRn_M0(wrd);
2194        gen_op_iwmmxt_set_mup();
2195        gen_op_iwmmxt_set_cup();
2196        break;
2197    case 0x104: case 0x504: case 0x904: case 0xd04:	/* WSLL */
2198    case 0x114: case 0x514: case 0x914: case 0xd14:
2199        wrd = (insn >> 12) & 0xf;
2200        rd0 = (insn >> 16) & 0xf;
2201        gen_op_iwmmxt_movq_M0_wRn(rd0);
2202        if (gen_iwmmxt_shift(insn, 0xff))
2203            return 1;
2204        switch ((insn >> 22) & 3) {
2205        case 0:
2206            return 1;
2207        case 1:
2208            gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2209            break;
2210        case 2:
2211            gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2212            break;
2213        case 3:
2214            gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2215            break;
2216        }
2217        gen_op_iwmmxt_movq_wRn_M0(wrd);
2218        gen_op_iwmmxt_set_mup();
2219        gen_op_iwmmxt_set_cup();
2220        break;
2221    case 0x304: case 0x704: case 0xb04: case 0xf04:	/* WROR */
2222    case 0x314: case 0x714: case 0xb14: case 0xf14:
2223        wrd = (insn >> 12) & 0xf;
2224        rd0 = (insn >> 16) & 0xf;
2225        gen_op_iwmmxt_movq_M0_wRn(rd0);
2226        switch ((insn >> 22) & 3) {
2227        case 0:
2228            return 1;
2229        case 1:
2230            if (gen_iwmmxt_shift(insn, 0xf))
2231                return 1;
2232            gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2233            break;
2234        case 2:
2235            if (gen_iwmmxt_shift(insn, 0x1f))
2236                return 1;
2237            gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2238            break;
2239        case 3:
2240            if (gen_iwmmxt_shift(insn, 0x3f))
2241                return 1;
2242            gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2243            break;
2244        }
2245        gen_op_iwmmxt_movq_wRn_M0(wrd);
2246        gen_op_iwmmxt_set_mup();
2247        gen_op_iwmmxt_set_cup();
2248        break;
2249    case 0x116: case 0x316: case 0x516: case 0x716:	/* WMIN */
2250    case 0x916: case 0xb16: case 0xd16: case 0xf16:
2251        wrd = (insn >> 12) & 0xf;
2252        rd0 = (insn >> 16) & 0xf;
2253        rd1 = (insn >> 0) & 0xf;
2254        gen_op_iwmmxt_movq_M0_wRn(rd0);
2255        switch ((insn >> 22) & 3) {
2256        case 0:
2257            if (insn & (1 << 21))
2258                gen_op_iwmmxt_minsb_M0_wRn(rd1);
2259            else
2260                gen_op_iwmmxt_minub_M0_wRn(rd1);
2261            break;
2262        case 1:
2263            if (insn & (1 << 21))
2264                gen_op_iwmmxt_minsw_M0_wRn(rd1);
2265            else
2266                gen_op_iwmmxt_minuw_M0_wRn(rd1);
2267            break;
2268        case 2:
2269            if (insn & (1 << 21))
2270                gen_op_iwmmxt_minsl_M0_wRn(rd1);
2271            else
2272                gen_op_iwmmxt_minul_M0_wRn(rd1);
2273            break;
2274        case 3:
2275            return 1;
2276        }
2277        gen_op_iwmmxt_movq_wRn_M0(wrd);
2278        gen_op_iwmmxt_set_mup();
2279        break;
2280    case 0x016: case 0x216: case 0x416: case 0x616:	/* WMAX */
2281    case 0x816: case 0xa16: case 0xc16: case 0xe16:
2282        wrd = (insn >> 12) & 0xf;
2283        rd0 = (insn >> 16) & 0xf;
2284        rd1 = (insn >> 0) & 0xf;
2285        gen_op_iwmmxt_movq_M0_wRn(rd0);
2286        switch ((insn >> 22) & 3) {
2287        case 0:
2288            if (insn & (1 << 21))
2289                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2290            else
2291                gen_op_iwmmxt_maxub_M0_wRn(rd1);
2292            break;
2293        case 1:
2294            if (insn & (1 << 21))
2295                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2296            else
2297                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2298            break;
2299        case 2:
2300            if (insn & (1 << 21))
2301                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2302            else
2303                gen_op_iwmmxt_maxul_M0_wRn(rd1);
2304            break;
2305        case 3:
2306            return 1;
2307        }
2308        gen_op_iwmmxt_movq_wRn_M0(wrd);
2309        gen_op_iwmmxt_set_mup();
2310        break;
2311    case 0x002: case 0x102: case 0x202: case 0x302:	/* WALIGNI */
2312    case 0x402: case 0x502: case 0x602: case 0x702:
2313        wrd = (insn >> 12) & 0xf;
2314        rd0 = (insn >> 16) & 0xf;
2315        rd1 = (insn >> 0) & 0xf;
2316        gen_op_iwmmxt_movq_M0_wRn(rd0);
2317        gen_op_movl_T0_im((insn >> 20) & 3);
2318        gen_op_iwmmxt_align_M0_T0_wRn(rd1);
2319        gen_op_iwmmxt_movq_wRn_M0(wrd);
2320        gen_op_iwmmxt_set_mup();
2321        break;
2322    case 0x01a: case 0x11a: case 0x21a: case 0x31a:	/* WSUB */
2323    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2324    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2325    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2326        wrd = (insn >> 12) & 0xf;
2327        rd0 = (insn >> 16) & 0xf;
2328        rd1 = (insn >> 0) & 0xf;
2329        gen_op_iwmmxt_movq_M0_wRn(rd0);
2330        switch ((insn >> 20) & 0xf) {
2331        case 0x0:
2332            gen_op_iwmmxt_subnb_M0_wRn(rd1);
2333            break;
2334        case 0x1:
2335            gen_op_iwmmxt_subub_M0_wRn(rd1);
2336            break;
2337        case 0x3:
2338            gen_op_iwmmxt_subsb_M0_wRn(rd1);
2339            break;
2340        case 0x4:
2341            gen_op_iwmmxt_subnw_M0_wRn(rd1);
2342            break;
2343        case 0x5:
2344            gen_op_iwmmxt_subuw_M0_wRn(rd1);
2345            break;
2346        case 0x7:
2347            gen_op_iwmmxt_subsw_M0_wRn(rd1);
2348            break;
2349        case 0x8:
2350            gen_op_iwmmxt_subnl_M0_wRn(rd1);
2351            break;
2352        case 0x9:
2353            gen_op_iwmmxt_subul_M0_wRn(rd1);
2354            break;
2355        case 0xb:
2356            gen_op_iwmmxt_subsl_M0_wRn(rd1);
2357            break;
2358        default:
2359            return 1;
2360        }
2361        gen_op_iwmmxt_movq_wRn_M0(wrd);
2362        gen_op_iwmmxt_set_mup();
2363        gen_op_iwmmxt_set_cup();
2364        break;
2365    case 0x01e: case 0x11e: case 0x21e: case 0x31e:	/* WSHUFH */
2366    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2367    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2368    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2369        wrd = (insn >> 12) & 0xf;
2370        rd0 = (insn >> 16) & 0xf;
2371        gen_op_iwmmxt_movq_M0_wRn(rd0);
2372        gen_op_movl_T0_im(((insn >> 16) & 0xf0) | (insn & 0x0f));
2373        gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2374        gen_op_iwmmxt_movq_wRn_M0(wrd);
2375        gen_op_iwmmxt_set_mup();
2376        gen_op_iwmmxt_set_cup();
2377        break;
2378    case 0x018: case 0x118: case 0x218: case 0x318:	/* WADD */
2379    case 0x418: case 0x518: case 0x618: case 0x718:
2380    case 0x818: case 0x918: case 0xa18: case 0xb18:
2381    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2382        wrd = (insn >> 12) & 0xf;
2383        rd0 = (insn >> 16) & 0xf;
2384        rd1 = (insn >> 0) & 0xf;
2385        gen_op_iwmmxt_movq_M0_wRn(rd0);
2386        switch ((insn >> 20) & 0xf) {
2387        case 0x0:
2388            gen_op_iwmmxt_addnb_M0_wRn(rd1);
2389            break;
2390        case 0x1:
2391            gen_op_iwmmxt_addub_M0_wRn(rd1);
2392            break;
2393        case 0x3:
2394            gen_op_iwmmxt_addsb_M0_wRn(rd1);
2395            break;
2396        case 0x4:
2397            gen_op_iwmmxt_addnw_M0_wRn(rd1);
2398            break;
2399        case 0x5:
2400            gen_op_iwmmxt_adduw_M0_wRn(rd1);
2401            break;
2402        case 0x7:
2403            gen_op_iwmmxt_addsw_M0_wRn(rd1);
2404            break;
2405        case 0x8:
2406            gen_op_iwmmxt_addnl_M0_wRn(rd1);
2407            break;
2408        case 0x9:
2409            gen_op_iwmmxt_addul_M0_wRn(rd1);
2410            break;
2411        case 0xb:
2412            gen_op_iwmmxt_addsl_M0_wRn(rd1);
2413            break;
2414        default:
2415            return 1;
2416        }
2417        gen_op_iwmmxt_movq_wRn_M0(wrd);
2418        gen_op_iwmmxt_set_mup();
2419        gen_op_iwmmxt_set_cup();
2420        break;
2421    case 0x008: case 0x108: case 0x208: case 0x308:	/* WPACK */
2422    case 0x408: case 0x508: case 0x608: case 0x708:
2423    case 0x808: case 0x908: case 0xa08: case 0xb08:
2424    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2425        wrd = (insn >> 12) & 0xf;
2426        rd0 = (insn >> 16) & 0xf;
2427        rd1 = (insn >> 0) & 0xf;
2428        gen_op_iwmmxt_movq_M0_wRn(rd0);
2429        if (!(insn & (1 << 20)))
2430            return 1;
2431        switch ((insn >> 22) & 3) {
2432        case 0:
2433            return 1;
2434        case 1:
2435            if (insn & (1 << 21))
2436                gen_op_iwmmxt_packsw_M0_wRn(rd1);
2437            else
2438                gen_op_iwmmxt_packuw_M0_wRn(rd1);
2439            break;
2440        case 2:
2441            if (insn & (1 << 21))
2442                gen_op_iwmmxt_packsl_M0_wRn(rd1);
2443            else
2444                gen_op_iwmmxt_packul_M0_wRn(rd1);
2445            break;
2446        case 3:
2447            if (insn & (1 << 21))
2448                gen_op_iwmmxt_packsq_M0_wRn(rd1);
2449            else
2450                gen_op_iwmmxt_packuq_M0_wRn(rd1);
2451            break;
2452        }
2453        gen_op_iwmmxt_movq_wRn_M0(wrd);
2454        gen_op_iwmmxt_set_mup();
2455        gen_op_iwmmxt_set_cup();
2456        break;
2457    case 0x201: case 0x203: case 0x205: case 0x207:
2458    case 0x209: case 0x20b: case 0x20d: case 0x20f:
2459    case 0x211: case 0x213: case 0x215: case 0x217:
2460    case 0x219: case 0x21b: case 0x21d: case 0x21f:
2461        wrd = (insn >> 5) & 0xf;
2462        rd0 = (insn >> 12) & 0xf;
2463        rd1 = (insn >> 0) & 0xf;
2464        if (rd0 == 0xf || rd1 == 0xf)
2465            return 1;
2466        gen_op_iwmmxt_movq_M0_wRn(wrd);
2467        switch ((insn >> 16) & 0xf) {
2468        case 0x0:					/* TMIA */
2469            gen_movl_T0_reg(s, rd0);
2470            gen_movl_T1_reg(s, rd1);
2471            gen_op_iwmmxt_muladdsl_M0_T0_T1();
2472            break;
2473        case 0x8:					/* TMIAPH */
2474            gen_movl_T0_reg(s, rd0);
2475            gen_movl_T1_reg(s, rd1);
2476            gen_op_iwmmxt_muladdsw_M0_T0_T1();
2477            break;
2478        case 0xc: case 0xd: case 0xe: case 0xf:		/* TMIAxy */
2479            gen_movl_T1_reg(s, rd0);
2480            if (insn & (1 << 16))
2481                gen_op_shrl_T1_im(16);
2482            gen_op_movl_T0_T1();
2483            gen_movl_T1_reg(s, rd1);
2484            if (insn & (1 << 17))
2485                gen_op_shrl_T1_im(16);
2486            gen_op_iwmmxt_muladdswl_M0_T0_T1();
2487            break;
2488        default:
2489            return 1;
2490        }
2491        gen_op_iwmmxt_movq_wRn_M0(wrd);
2492        gen_op_iwmmxt_set_mup();
2493        break;
2494    default:
2495        return 1;
2496    }
2497
2498    return 0;
2499}
2500
2501/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occured
2502   (ie. an undefined instruction).  */
2503static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn)
2504{
2505    int acc, rd0, rd1, rdhi, rdlo;
2506
2507    if ((insn & 0x0ff00f10) == 0x0e200010) {
2508        /* Multiply with Internal Accumulate Format */
2509        rd0 = (insn >> 12) & 0xf;
2510        rd1 = insn & 0xf;
2511        acc = (insn >> 5) & 7;
2512
2513        if (acc != 0)
2514            return 1;
2515
2516        switch ((insn >> 16) & 0xf) {
2517        case 0x0:					/* MIA */
2518            gen_movl_T0_reg(s, rd0);
2519            gen_movl_T1_reg(s, rd1);
2520            gen_op_iwmmxt_muladdsl_M0_T0_T1();
2521            break;
2522        case 0x8:					/* MIAPH */
2523            gen_movl_T0_reg(s, rd0);
2524            gen_movl_T1_reg(s, rd1);
2525            gen_op_iwmmxt_muladdsw_M0_T0_T1();
2526            break;
2527        case 0xc:					/* MIABB */
2528        case 0xd:					/* MIABT */
2529        case 0xe:					/* MIATB */
2530        case 0xf:					/* MIATT */
2531            gen_movl_T1_reg(s, rd0);
2532            if (insn & (1 << 16))
2533                gen_op_shrl_T1_im(16);
2534            gen_op_movl_T0_T1();
2535            gen_movl_T1_reg(s, rd1);
2536            if (insn & (1 << 17))
2537                gen_op_shrl_T1_im(16);
2538            gen_op_iwmmxt_muladdswl_M0_T0_T1();
2539            break;
2540        default:
2541            return 1;
2542        }
2543
2544        gen_op_iwmmxt_movq_wRn_M0(acc);
2545        return 0;
2546    }
2547
2548    if ((insn & 0x0fe00ff8) == 0x0c400000) {
2549        /* Internal Accumulator Access Format */
2550        rdhi = (insn >> 16) & 0xf;
2551        rdlo = (insn >> 12) & 0xf;
2552        acc = insn & 7;
2553
2554        if (acc != 0)
2555            return 1;
2556
2557        if (insn & ARM_CP_RW_BIT) {			/* MRA */
2558            gen_iwmmxt_movl_T0_T1_wRn(acc);
2559            gen_movl_reg_T0(s, rdlo);
2560            gen_op_movl_T0_im((1 << (40 - 32)) - 1);
2561            gen_op_andl_T0_T1();
2562            gen_movl_reg_T0(s, rdhi);
2563        } else {					/* MAR */
2564            gen_movl_T0_reg(s, rdlo);
2565            gen_movl_T1_reg(s, rdhi);
2566            gen_iwmmxt_movl_wRn_T0_T1(acc);
2567        }
2568        return 0;
2569    }
2570
2571    return 1;
2572}
2573
2574/* Disassemble system coprocessor instruction.  Return nonzero if
2575   instruction is not defined.  */
2576static int disas_cp_insn(CPUState *env, DisasContext *s, uint32_t insn)
2577{
2578    TCGv tmp;
2579    uint32_t rd = (insn >> 12) & 0xf;
2580    uint32_t cp = (insn >> 8) & 0xf;
2581    if (IS_USER(s)) {
2582        return 1;
2583    }
2584
2585    if (insn & ARM_CP_RW_BIT) {
2586        if (!env->cp[cp].cp_read)
2587            return 1;
2588        gen_set_pc_im(s->pc);
2589        tmp = new_tmp();
2590        gen_helper_get_cp(tmp, cpu_env, tcg_const_i32(insn));
2591        store_reg(s, rd, tmp);
2592    } else {
2593        if (!env->cp[cp].cp_write)
2594            return 1;
2595        gen_set_pc_im(s->pc);
2596        tmp = load_reg(s, rd);
2597        gen_helper_set_cp(cpu_env, tcg_const_i32(insn), tmp);
2598        dead_tmp(tmp);
2599    }
2600    return 0;
2601}
2602
2603static int cp15_user_ok(uint32_t insn)
2604{
2605    int cpn = (insn >> 16) & 0xf;
2606    int cpm = insn & 0xf;
2607    int op = ((insn >> 5) & 7) | ((insn >> 18) & 0x38);
2608
2609    if (cpn == 13 && cpm == 0) {
2610        /* TLS register.  */
2611        if (op == 2 || (op == 3 && (insn & ARM_CP_RW_BIT)))
2612            return 1;
2613    }
2614    if (cpn == 7) {
2615        /* ISB, DSB, DMB.  */
2616        if ((cpm == 5 && op == 4)
2617                || (cpm == 10 && (op == 4 || op == 5)))
2618            return 1;
2619    }
2620    return 0;
2621}
2622
2623/* Disassemble system coprocessor (cp15) instruction.  Return nonzero if
2624   instruction is not defined.  */
2625static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)
2626{
2627    uint32_t rd;
2628    TCGv tmp;
2629
2630    /* M profile cores use memory mapped registers instead of cp15.  */
2631    if (arm_feature(env, ARM_FEATURE_M))
2632	return 1;
2633
2634    if ((insn & (1 << 25)) == 0) {
2635        if (insn & (1 << 20)) {
2636            /* mrrc */
2637            return 1;
2638        }
2639        /* mcrr.  Used for block cache operations, so implement as no-op.  */
2640        return 0;
2641    }
2642    if ((insn & (1 << 4)) == 0) {
2643        /* cdp */
2644        return 1;
2645    }
2646    if (IS_USER(s) && !cp15_user_ok(insn)) {
2647        return 1;
2648    }
2649    if ((insn & 0x0fff0fff) == 0x0e070f90
2650        || (insn & 0x0fff0fff) == 0x0e070f58) {
2651        /* Wait for interrupt.  */
2652        gen_set_pc_im(s->pc);
2653        s->is_jmp = DISAS_WFI;
2654        return 0;
2655    }
2656    rd = (insn >> 12) & 0xf;
2657    if (insn & ARM_CP_RW_BIT) {
2658        tmp = new_tmp();
2659        gen_helper_get_cp15(tmp, cpu_env, tcg_const_i32(insn));
2660        /* If the destination register is r15 then sets condition codes.  */
2661        if (rd != 15)
2662            store_reg(s, rd, tmp);
2663        else
2664            dead_tmp(tmp);
2665    } else {
2666        tmp = load_reg(s, rd);
2667        gen_helper_set_cp15(cpu_env, tcg_const_i32(insn), tmp);
2668        dead_tmp(tmp);
2669        /* Normally we would always end the TB here, but Linux
2670         * arch/arm/mach-pxa/sleep.S expects two instructions following
2671         * an MMU enable to execute from cache.  Imitate this behaviour.  */
2672        if (!arm_feature(env, ARM_FEATURE_XSCALE) ||
2673                (insn & 0x0fff0fff) != 0x0e010f10)
2674            gen_lookup_tb(s);
2675    }
2676    return 0;
2677}
2678
2679#define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2680#define VFP_SREG(insn, bigbit, smallbit) \
2681  ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
2682#define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2683    if (arm_feature(env, ARM_FEATURE_VFP3)) { \
2684        reg = (((insn) >> (bigbit)) & 0x0f) \
2685              | (((insn) >> ((smallbit) - 4)) & 0x10); \
2686    } else { \
2687        if (insn & (1 << (smallbit))) \
2688            return 1; \
2689        reg = ((insn) >> (bigbit)) & 0x0f; \
2690    }} while (0)
2691
2692#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2693#define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2694#define VFP_SREG_N(insn) VFP_SREG(insn, 16,  7)
2695#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2696#define VFP_SREG_M(insn) VFP_SREG(insn,  0,  5)
2697#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2698
2699/* Move between integer and VFP cores.  */
2700static TCGv gen_vfp_mrs(void)
2701{
2702    TCGv tmp = new_tmp();
2703    tcg_gen_mov_i32(tmp, cpu_F0s);
2704    return tmp;
2705}
2706
2707static void gen_vfp_msr(TCGv tmp)
2708{
2709    tcg_gen_mov_i32(cpu_F0s, tmp);
2710    dead_tmp(tmp);
2711}
2712
2713static inline int
2714vfp_enabled(CPUState * env)
2715{
2716    return ((env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) != 0);
2717}
2718
2719static void gen_neon_dup_u8(TCGv var, int shift)
2720{
2721    TCGv tmp = new_tmp();
2722    if (shift)
2723        tcg_gen_shri_i32(var, var, shift);
2724    tcg_gen_ext8u_i32(var, var);
2725    tcg_gen_shli_i32(tmp, var, 8);
2726    tcg_gen_or_i32(var, var, tmp);
2727    tcg_gen_shli_i32(tmp, var, 16);
2728    tcg_gen_or_i32(var, var, tmp);
2729    dead_tmp(tmp);
2730}
2731
2732static void gen_neon_dup_low16(TCGv var)
2733{
2734    TCGv tmp = new_tmp();
2735    tcg_gen_ext16u_i32(var, var);
2736    tcg_gen_shli_i32(tmp, var, 16);
2737    tcg_gen_or_i32(var, var, tmp);
2738    dead_tmp(tmp);
2739}
2740
2741static void gen_neon_dup_high16(TCGv var)
2742{
2743    TCGv tmp = new_tmp();
2744    tcg_gen_andi_i32(var, var, 0xffff0000);
2745    tcg_gen_shri_i32(tmp, var, 16);
2746    tcg_gen_or_i32(var, var, tmp);
2747    dead_tmp(tmp);
2748}
2749
2750/* Disassemble a VFP instruction.  Returns nonzero if an error occured
2751   (ie. an undefined instruction).  */
2752static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
2753{
2754    uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
2755    int dp, veclen;
2756    TCGv tmp;
2757    TCGv tmp2;
2758
2759    if (!arm_feature(env, ARM_FEATURE_VFP))
2760        return 1;
2761
2762    if (!vfp_enabled(env)) {
2763        /* VFP disabled.  Only allow fmxr/fmrx to/from some control regs.  */
2764        if ((insn & 0x0fe00fff) != 0x0ee00a10)
2765            return 1;
2766        rn = (insn >> 16) & 0xf;
2767        if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC
2768            && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0)
2769            return 1;
2770    }
2771    dp = ((insn & 0xf00) == 0xb00);
2772    switch ((insn >> 24) & 0xf) {
2773    case 0xe:
2774        if (insn & (1 << 4)) {
2775            /* single register transfer */
2776            rd = (insn >> 12) & 0xf;
2777            if (dp) {
2778                int size;
2779                int pass;
2780
2781                VFP_DREG_N(rn, insn);
2782                if (insn & 0xf)
2783                    return 1;
2784                if (insn & 0x00c00060
2785                    && !arm_feature(env, ARM_FEATURE_NEON))
2786                    return 1;
2787
2788                pass = (insn >> 21) & 1;
2789                if (insn & (1 << 22)) {
2790                    size = 0;
2791                    offset = ((insn >> 5) & 3) * 8;
2792                } else if (insn & (1 << 5)) {
2793                    size = 1;
2794                    offset = (insn & (1 << 6)) ? 16 : 0;
2795                } else {
2796                    size = 2;
2797                    offset = 0;
2798                }
2799                if (insn & ARM_CP_RW_BIT) {
2800                    /* vfp->arm */
2801                    tmp = neon_load_reg(rn, pass);
2802                    switch (size) {
2803                    case 0:
2804                        if (offset)
2805                            tcg_gen_shri_i32(tmp, tmp, offset);
2806                        if (insn & (1 << 23))
2807                            gen_uxtb(tmp);
2808                        else
2809                            gen_sxtb(tmp);
2810                        break;
2811                    case 1:
2812                        if (insn & (1 << 23)) {
2813                            if (offset) {
2814                                tcg_gen_shri_i32(tmp, tmp, 16);
2815                            } else {
2816                                gen_uxth(tmp);
2817                            }
2818                        } else {
2819                            if (offset) {
2820                                tcg_gen_sari_i32(tmp, tmp, 16);
2821                            } else {
2822                                gen_sxth(tmp);
2823                            }
2824                        }
2825                        break;
2826                    case 2:
2827                        break;
2828                    }
2829                    store_reg(s, rd, tmp);
2830                } else {
2831                    /* arm->vfp */
2832                    tmp = load_reg(s, rd);
2833                    if (insn & (1 << 23)) {
2834                        /* VDUP */
2835                        if (size == 0) {
2836                            gen_neon_dup_u8(tmp, 0);
2837                        } else if (size == 1) {
2838                            gen_neon_dup_low16(tmp);
2839                        }
2840                        for (n = 0; n <= pass * 2; n++) {
2841                            tmp2 = new_tmp();
2842                            tcg_gen_mov_i32(tmp2, tmp);
2843                            neon_store_reg(rn, n, tmp2);
2844                        }
2845                        neon_store_reg(rn, n, tmp);
2846                    } else {
2847                        /* VMOV */
2848                        switch (size) {
2849                        case 0:
2850                            tmp2 = neon_load_reg(rn, pass);
2851                            gen_bfi(tmp, tmp2, tmp, offset, 0xff);
2852                            dead_tmp(tmp2);
2853                            break;
2854                        case 1:
2855                            tmp2 = neon_load_reg(rn, pass);
2856                            gen_bfi(tmp, tmp2, tmp, offset, 0xffff);
2857                            dead_tmp(tmp2);
2858                            break;
2859                        case 2:
2860                            break;
2861                        }
2862                        neon_store_reg(rn, pass, tmp);
2863                    }
2864                }
2865            } else { /* !dp */
2866                if ((insn & 0x6f) != 0x00)
2867                    return 1;
2868                rn = VFP_SREG_N(insn);
2869                if (insn & ARM_CP_RW_BIT) {
2870                    /* vfp->arm */
2871                    if (insn & (1 << 21)) {
2872                        /* system register */
2873                        rn >>= 1;
2874
2875                        switch (rn) {
2876                        case ARM_VFP_FPSID:
2877                            /* VFP2 allows access to FSID from userspace.
2878                               VFP3 restricts all id registers to privileged
2879                               accesses.  */
2880                            if (IS_USER(s)
2881                                && arm_feature(env, ARM_FEATURE_VFP3))
2882                                return 1;
2883                            tmp = load_cpu_field(vfp.xregs[rn]);
2884                            break;
2885                        case ARM_VFP_FPEXC:
2886                            if (IS_USER(s))
2887                                return 1;
2888                            tmp = load_cpu_field(vfp.xregs[rn]);
2889                            break;
2890                        case ARM_VFP_FPINST:
2891                        case ARM_VFP_FPINST2:
2892                            /* Not present in VFP3.  */
2893                            if (IS_USER(s)
2894                                || arm_feature(env, ARM_FEATURE_VFP3))
2895                                return 1;
2896                            tmp = load_cpu_field(vfp.xregs[rn]);
2897                            break;
2898                        case ARM_VFP_FPSCR:
2899                            if (rd == 15) {
2900                                tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
2901                                tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
2902                            } else {
2903                                tmp = new_tmp();
2904                                gen_helper_vfp_get_fpscr(tmp, cpu_env);
2905                            }
2906                            break;
2907                        case ARM_VFP_MVFR0:
2908                        case ARM_VFP_MVFR1:
2909                            if (IS_USER(s)
2910                                || !arm_feature(env, ARM_FEATURE_VFP3))
2911                                return 1;
2912                            tmp = load_cpu_field(vfp.xregs[rn]);
2913                            break;
2914                        default:
2915                            return 1;
2916                        }
2917                    } else {
2918                        gen_mov_F0_vreg(0, rn);
2919                        tmp = gen_vfp_mrs();
2920                    }
2921                    if (rd == 15) {
2922                        /* Set the 4 flag bits in the CPSR.  */
2923                        gen_set_nzcv(tmp);
2924                        dead_tmp(tmp);
2925                    } else {
2926                        store_reg(s, rd, tmp);
2927                    }
2928                } else {
2929                    /* arm->vfp */
2930                    tmp = load_reg(s, rd);
2931                    if (insn & (1 << 21)) {
2932                        rn >>= 1;
2933                        /* system register */
2934                        switch (rn) {
2935                        case ARM_VFP_FPSID:
2936                        case ARM_VFP_MVFR0:
2937                        case ARM_VFP_MVFR1:
2938                            /* Writes are ignored.  */
2939                            break;
2940                        case ARM_VFP_FPSCR:
2941                            gen_helper_vfp_set_fpscr(cpu_env, tmp);
2942                            dead_tmp(tmp);
2943                            gen_lookup_tb(s);
2944                            break;
2945                        case ARM_VFP_FPEXC:
2946                            if (IS_USER(s))
2947                                return 1;
2948                            store_cpu_field(tmp, vfp.xregs[rn]);
2949                            gen_lookup_tb(s);
2950                            break;
2951                        case ARM_VFP_FPINST:
2952                        case ARM_VFP_FPINST2:
2953                            store_cpu_field(tmp, vfp.xregs[rn]);
2954                            break;
2955                        default:
2956                            return 1;
2957                        }
2958                    } else {
2959                        gen_vfp_msr(tmp);
2960                        gen_mov_vreg_F0(0, rn);
2961                    }
2962                }
2963            }
2964        } else {
2965            /* data processing */
2966            /* The opcode is in bits 23, 21, 20 and 6.  */
2967            op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
2968            if (dp) {
2969                if (op == 15) {
2970                    /* rn is opcode */
2971                    rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
2972                } else {
2973                    /* rn is register number */
2974                    VFP_DREG_N(rn, insn);
2975                }
2976
2977                if (op == 15 && (rn == 15 || rn > 17)) {
2978                    /* Integer or single precision destination.  */
2979                    rd = VFP_SREG_D(insn);
2980                } else {
2981                    VFP_DREG_D(rd, insn);
2982                }
2983
2984                if (op == 15 && (rn == 16 || rn == 17)) {
2985                    /* Integer source.  */
2986                    rm = ((insn << 1) & 0x1e) | ((insn >> 5) & 1);
2987                } else {
2988                    VFP_DREG_M(rm, insn);
2989                }
2990            } else {
2991                rn = VFP_SREG_N(insn);
2992                if (op == 15 && rn == 15) {
2993                    /* Double precision destination.  */
2994                    VFP_DREG_D(rd, insn);
2995                } else {
2996                    rd = VFP_SREG_D(insn);
2997                }
2998                rm = VFP_SREG_M(insn);
2999            }
3000
3001            veclen = env->vfp.vec_len;
3002            if (op == 15 && rn > 3)
3003                veclen = 0;
3004
3005            /* Shut up compiler warnings.  */
3006            delta_m = 0;
3007            delta_d = 0;
3008            bank_mask = 0;
3009
3010            if (veclen > 0) {
3011                if (dp)
3012                    bank_mask = 0xc;
3013                else
3014                    bank_mask = 0x18;
3015
3016                /* Figure out what type of vector operation this is.  */
3017                if ((rd & bank_mask) == 0) {
3018                    /* scalar */
3019                    veclen = 0;
3020                } else {
3021                    if (dp)
3022                        delta_d = (env->vfp.vec_stride >> 1) + 1;
3023                    else
3024                        delta_d = env->vfp.vec_stride + 1;
3025
3026                    if ((rm & bank_mask) == 0) {
3027                        /* mixed scalar/vector */
3028                        delta_m = 0;
3029                    } else {
3030                        /* vector */
3031                        delta_m = delta_d;
3032                    }
3033                }
3034            }
3035
3036            /* Load the initial operands.  */
3037            if (op == 15) {
3038                switch (rn) {
3039                case 16:
3040                case 17:
3041                    /* Integer source */
3042                    gen_mov_F0_vreg(0, rm);
3043                    break;
3044                case 8:
3045                case 9:
3046                    /* Compare */
3047                    gen_mov_F0_vreg(dp, rd);
3048                    gen_mov_F1_vreg(dp, rm);
3049                    break;
3050                case 10:
3051                case 11:
3052                    /* Compare with zero */
3053                    gen_mov_F0_vreg(dp, rd);
3054                    gen_vfp_F1_ld0(dp);
3055                    break;
3056                case 20:
3057                case 21:
3058                case 22:
3059                case 23:
3060                case 28:
3061                case 29:
3062                case 30:
3063                case 31:
3064                    /* Source and destination the same.  */
3065                    gen_mov_F0_vreg(dp, rd);
3066                    break;
3067                default:
3068                    /* One source operand.  */
3069                    gen_mov_F0_vreg(dp, rm);
3070                    break;
3071                }
3072            } else {
3073                /* Two source operands.  */
3074                gen_mov_F0_vreg(dp, rn);
3075                gen_mov_F1_vreg(dp, rm);
3076            }
3077
3078            for (;;) {
3079                /* Perform the calculation.  */
3080                switch (op) {
3081                case 0: /* mac: fd + (fn * fm) */
3082                    gen_vfp_mul(dp);
3083                    gen_mov_F1_vreg(dp, rd);
3084                    gen_vfp_add(dp);
3085                    break;
3086                case 1: /* nmac: fd - (fn * fm) */
3087                    gen_vfp_mul(dp);
3088                    gen_vfp_neg(dp);
3089                    gen_mov_F1_vreg(dp, rd);
3090                    gen_vfp_add(dp);
3091                    break;
3092                case 2: /* msc: -fd + (fn * fm) */
3093                    gen_vfp_mul(dp);
3094                    gen_mov_F1_vreg(dp, rd);
3095                    gen_vfp_sub(dp);
3096                    break;
3097                case 3: /* nmsc: -fd - (fn * fm)  */
3098                    gen_vfp_mul(dp);
3099                    gen_vfp_neg(dp);
3100                    gen_mov_F1_vreg(dp, rd);
3101                    gen_vfp_sub(dp);
3102                    break;
3103                case 4: /* mul: fn * fm */
3104                    gen_vfp_mul(dp);
3105                    break;
3106                case 5: /* nmul: -(fn * fm) */
3107                    gen_vfp_mul(dp);
3108                    gen_vfp_neg(dp);
3109                    break;
3110                case 6: /* add: fn + fm */
3111                    gen_vfp_add(dp);
3112                    break;
3113                case 7: /* sub: fn - fm */
3114                    gen_vfp_sub(dp);
3115                    break;
3116                case 8: /* div: fn / fm */
3117                    gen_vfp_div(dp);
3118                    break;
3119                case 14: /* fconst */
3120                    if (!arm_feature(env, ARM_FEATURE_VFP3))
3121                      return 1;
3122
3123                    n = (insn << 12) & 0x80000000;
3124                    i = ((insn >> 12) & 0x70) | (insn & 0xf);
3125                    if (dp) {
3126                        if (i & 0x40)
3127                            i |= 0x3f80;
3128                        else
3129                            i |= 0x4000;
3130                        n |= i << 16;
3131                        tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32);
3132                    } else {
3133                        if (i & 0x40)
3134                            i |= 0x780;
3135                        else
3136                            i |= 0x800;
3137                        n |= i << 19;
3138                        tcg_gen_movi_i32(cpu_F0s, n);
3139                    }
3140                    break;
3141                case 15: /* extension space */
3142                    switch (rn) {
3143                    case 0: /* cpy */
3144                        /* no-op */
3145                        break;
3146                    case 1: /* abs */
3147                        gen_vfp_abs(dp);
3148                        break;
3149                    case 2: /* neg */
3150                        gen_vfp_neg(dp);
3151                        break;
3152                    case 3: /* sqrt */
3153                        gen_vfp_sqrt(dp);
3154                        break;
3155                    case 8: /* cmp */
3156                        gen_vfp_cmp(dp);
3157                        break;
3158                    case 9: /* cmpe */
3159                        gen_vfp_cmpe(dp);
3160                        break;
3161                    case 10: /* cmpz */
3162                        gen_vfp_cmp(dp);
3163                        break;
3164                    case 11: /* cmpez */
3165                        gen_vfp_F1_ld0(dp);
3166                        gen_vfp_cmpe(dp);
3167                        break;
3168                    case 15: /* single<->double conversion */
3169                        if (dp)
3170                            gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
3171                        else
3172                            gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
3173                        break;
3174                    case 16: /* fuito */
3175                        gen_vfp_uito(dp);
3176                        break;
3177                    case 17: /* fsito */
3178                        gen_vfp_sito(dp);
3179                        break;
3180                    case 20: /* fshto */
3181                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3182                          return 1;
3183                        gen_vfp_shto(dp, 16 - rm);
3184                        break;
3185                    case 21: /* fslto */
3186                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3187                          return 1;
3188                        gen_vfp_slto(dp, 32 - rm);
3189                        break;
3190                    case 22: /* fuhto */
3191                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3192                          return 1;
3193                        gen_vfp_uhto(dp, 16 - rm);
3194                        break;
3195                    case 23: /* fulto */
3196                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3197                          return 1;
3198                        gen_vfp_ulto(dp, 32 - rm);
3199                        break;
3200                    case 24: /* ftoui */
3201                        gen_vfp_toui(dp);
3202                        break;
3203                    case 25: /* ftouiz */
3204                        gen_vfp_touiz(dp);
3205                        break;
3206                    case 26: /* ftosi */
3207                        gen_vfp_tosi(dp);
3208                        break;
3209                    case 27: /* ftosiz */
3210                        gen_vfp_tosiz(dp);
3211                        break;
3212                    case 28: /* ftosh */
3213                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3214                          return 1;
3215                        gen_vfp_tosh(dp, 16 - rm);
3216                        break;
3217                    case 29: /* ftosl */
3218                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3219                          return 1;
3220                        gen_vfp_tosl(dp, 32 - rm);
3221                        break;
3222                    case 30: /* ftouh */
3223                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3224                          return 1;
3225                        gen_vfp_touh(dp, 16 - rm);
3226                        break;
3227                    case 31: /* ftoul */
3228                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3229                          return 1;
3230                        gen_vfp_toul(dp, 32 - rm);
3231                        break;
3232                    default: /* undefined */
3233                        printf ("rn:%d\n", rn);
3234                        return 1;
3235                    }
3236                    break;
3237                default: /* undefined */
3238                    printf ("op:%d\n", op);
3239                    return 1;
3240                }
3241
3242                /* Write back the result.  */
3243                if (op == 15 && (rn >= 8 && rn <= 11))
3244                    ; /* Comparison, do nothing.  */
3245                else if (op == 15 && rn > 17)
3246                    /* Integer result.  */
3247                    gen_mov_vreg_F0(0, rd);
3248                else if (op == 15 && rn == 15)
3249                    /* conversion */
3250                    gen_mov_vreg_F0(!dp, rd);
3251                else
3252                    gen_mov_vreg_F0(dp, rd);
3253
3254                /* break out of the loop if we have finished  */
3255                if (veclen == 0)
3256                    break;
3257
3258                if (op == 15 && delta_m == 0) {
3259                    /* single source one-many */
3260                    while (veclen--) {
3261                        rd = ((rd + delta_d) & (bank_mask - 1))
3262                             | (rd & bank_mask);
3263                        gen_mov_vreg_F0(dp, rd);
3264                    }
3265                    break;
3266                }
3267                /* Setup the next operands.  */
3268                veclen--;
3269                rd = ((rd + delta_d) & (bank_mask - 1))
3270                     | (rd & bank_mask);
3271
3272                if (op == 15) {
3273                    /* One source operand.  */
3274                    rm = ((rm + delta_m) & (bank_mask - 1))
3275                         | (rm & bank_mask);
3276                    gen_mov_F0_vreg(dp, rm);
3277                } else {
3278                    /* Two source operands.  */
3279                    rn = ((rn + delta_d) & (bank_mask - 1))
3280                         | (rn & bank_mask);
3281                    gen_mov_F0_vreg(dp, rn);
3282                    if (delta_m) {
3283                        rm = ((rm + delta_m) & (bank_mask - 1))
3284                             | (rm & bank_mask);
3285                        gen_mov_F1_vreg(dp, rm);
3286                    }
3287                }
3288            }
3289        }
3290        break;
3291    case 0xc:
3292    case 0xd:
3293        if (dp && (insn & 0x03e00000) == 0x00400000) {
3294            /* two-register transfer */
3295            rn = (insn >> 16) & 0xf;
3296            rd = (insn >> 12) & 0xf;
3297            if (dp) {
3298                VFP_DREG_M(rm, insn);
3299            } else {
3300                rm = VFP_SREG_M(insn);
3301            }
3302
3303            if (insn & ARM_CP_RW_BIT) {
3304                /* vfp->arm */
3305                if (dp) {
3306                    gen_mov_F0_vreg(0, rm * 2);
3307                    tmp = gen_vfp_mrs();
3308                    store_reg(s, rd, tmp);
3309                    gen_mov_F0_vreg(0, rm * 2 + 1);
3310                    tmp = gen_vfp_mrs();
3311                    store_reg(s, rn, tmp);
3312                } else {
3313                    gen_mov_F0_vreg(0, rm);
3314                    tmp = gen_vfp_mrs();
3315                    store_reg(s, rn, tmp);
3316                    gen_mov_F0_vreg(0, rm + 1);
3317                    tmp = gen_vfp_mrs();
3318                    store_reg(s, rd, tmp);
3319                }
3320            } else {
3321                /* arm->vfp */
3322                if (dp) {
3323                    tmp = load_reg(s, rd);
3324                    gen_vfp_msr(tmp);
3325                    gen_mov_vreg_F0(0, rm * 2);
3326                    tmp = load_reg(s, rn);
3327                    gen_vfp_msr(tmp);
3328                    gen_mov_vreg_F0(0, rm * 2 + 1);
3329                } else {
3330                    tmp = load_reg(s, rn);
3331                    gen_vfp_msr(tmp);
3332                    gen_mov_vreg_F0(0, rm);
3333                    tmp = load_reg(s, rd);
3334                    gen_vfp_msr(tmp);
3335                    gen_mov_vreg_F0(0, rm + 1);
3336                }
3337            }
3338        } else {
3339            /* Load/store */
3340            rn = (insn >> 16) & 0xf;
3341            if (dp)
3342                VFP_DREG_D(rd, insn);
3343            else
3344                rd = VFP_SREG_D(insn);
3345            if (s->thumb && rn == 15) {
3346                gen_op_movl_T1_im(s->pc & ~2);
3347            } else {
3348                gen_movl_T1_reg(s, rn);
3349            }
3350            if ((insn & 0x01200000) == 0x01000000) {
3351                /* Single load/store */
3352                offset = (insn & 0xff) << 2;
3353                if ((insn & (1 << 23)) == 0)
3354                    offset = -offset;
3355                gen_op_addl_T1_im(offset);
3356                if (insn & (1 << 20)) {
3357                    gen_vfp_ld(s, dp);
3358                    gen_mov_vreg_F0(dp, rd);
3359                } else {
3360                    gen_mov_F0_vreg(dp, rd);
3361                    gen_vfp_st(s, dp);
3362                }
3363            } else {
3364                /* load/store multiple */
3365                if (dp)
3366                    n = (insn >> 1) & 0x7f;
3367                else
3368                    n = insn & 0xff;
3369
3370                if (insn & (1 << 24)) /* pre-decrement */
3371                    gen_op_addl_T1_im(-((insn & 0xff) << 2));
3372
3373                if (dp)
3374                    offset = 8;
3375                else
3376                    offset = 4;
3377                for (i = 0; i < n; i++) {
3378                    if (insn & ARM_CP_RW_BIT) {
3379                        /* load */
3380                        gen_vfp_ld(s, dp);
3381                        gen_mov_vreg_F0(dp, rd + i);
3382                    } else {
3383                        /* store */
3384                        gen_mov_F0_vreg(dp, rd + i);
3385                        gen_vfp_st(s, dp);
3386                    }
3387                    gen_op_addl_T1_im(offset);
3388                }
3389                if (insn & (1 << 21)) {
3390                    /* writeback */
3391                    if (insn & (1 << 24))
3392                        offset = -offset * n;
3393                    else if (dp && (insn & 1))
3394                        offset = 4;
3395                    else
3396                        offset = 0;
3397
3398                    if (offset != 0)
3399                        gen_op_addl_T1_im(offset);
3400                    gen_movl_reg_T1(s, rn);
3401                }
3402            }
3403        }
3404        break;
3405    default:
3406        /* Should never happen.  */
3407        return 1;
3408    }
3409    return 0;
3410}
3411
3412static inline void gen_goto_tb(DisasContext *s, int n, uint32_t dest)
3413{
3414    TranslationBlock *tb;
3415
3416    tb = s->tb;
3417    if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
3418        tcg_gen_goto_tb(n);
3419        gen_set_pc_im(dest);
3420        tcg_gen_exit_tb((long)tb + n);
3421    } else {
3422        gen_set_pc_im(dest);
3423        tcg_gen_exit_tb(0);
3424    }
3425}
3426
3427static inline void gen_jmp (DisasContext *s, uint32_t dest)
3428{
3429    if (unlikely(s->singlestep_enabled)) {
3430        /* An indirect jump so that we still trigger the debug exception.  */
3431        if (s->thumb)
3432            dest |= 1;
3433        gen_bx_im(s, dest);
3434    } else {
3435        gen_goto_tb(s, 0, dest);
3436        s->is_jmp = DISAS_TB_JUMP;
3437    }
3438}
3439
3440static inline void gen_mulxy(TCGv t0, TCGv t1, int x, int y)
3441{
3442    if (x)
3443        tcg_gen_sari_i32(t0, t0, 16);
3444    else
3445        gen_sxth(t0);
3446    if (y)
3447        tcg_gen_sari_i32(t1, t1, 16);
3448    else
3449        gen_sxth(t1);
3450    tcg_gen_mul_i32(t0, t0, t1);
3451}
3452
3453/* Return the mask of PSR bits set by a MSR instruction.  */
3454static uint32_t msr_mask(CPUState *env, DisasContext *s, int flags, int spsr) {
3455    uint32_t mask;
3456
3457    mask = 0;
3458    if (flags & (1 << 0))
3459        mask |= 0xff;
3460    if (flags & (1 << 1))
3461        mask |= 0xff00;
3462    if (flags & (1 << 2))
3463        mask |= 0xff0000;
3464    if (flags & (1 << 3))
3465        mask |= 0xff000000;
3466
3467    /* Mask out undefined bits.  */
3468    mask &= ~CPSR_RESERVED;
3469    if (!arm_feature(env, ARM_FEATURE_V6))
3470        mask &= ~(CPSR_E | CPSR_GE);
3471    if (!arm_feature(env, ARM_FEATURE_THUMB2))
3472        mask &= ~CPSR_IT;
3473    /* Mask out execution state bits.  */
3474    if (!spsr)
3475        mask &= ~CPSR_EXEC;
3476    /* Mask out privileged bits.  */
3477    if (IS_USER(s))
3478        mask &= CPSR_USER;
3479    return mask;
3480}
3481
3482/* Returns nonzero if access to the PSR is not permitted.  */
3483static int gen_set_psr_T0(DisasContext *s, uint32_t mask, int spsr)
3484{
3485    TCGv tmp;
3486    if (spsr) {
3487        /* ??? This is also undefined in system mode.  */
3488        if (IS_USER(s))
3489            return 1;
3490
3491        tmp = load_cpu_field(spsr);
3492        tcg_gen_andi_i32(tmp, tmp, ~mask);
3493        tcg_gen_andi_i32(cpu_T[0], cpu_T[0], mask);
3494        tcg_gen_or_i32(tmp, tmp, cpu_T[0]);
3495        store_cpu_field(tmp, spsr);
3496    } else {
3497        gen_set_cpsr(cpu_T[0], mask);
3498    }
3499    gen_lookup_tb(s);
3500    return 0;
3501}
3502
3503/* Generate an old-style exception return. Marks pc as dead. */
3504static void gen_exception_return(DisasContext *s, TCGv pc)
3505{
3506    TCGv tmp;
3507    store_reg(s, 15, pc);
3508    tmp = load_cpu_field(spsr);
3509    gen_set_cpsr(tmp, 0xffffffff);
3510    dead_tmp(tmp);
3511    s->is_jmp = DISAS_UPDATE;
3512}
3513
3514/* Generate a v6 exception return.  Marks both values as dead.  */
3515static void gen_rfe(DisasContext *s, TCGv pc, TCGv cpsr)
3516{
3517    gen_set_cpsr(cpsr, 0xffffffff);
3518    dead_tmp(cpsr);
3519    store_reg(s, 15, pc);
3520    s->is_jmp = DISAS_UPDATE;
3521}
3522
3523static inline void
3524gen_set_condexec (DisasContext *s)
3525{
3526    if (s->condexec_mask) {
3527        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
3528        TCGv tmp = new_tmp();
3529        tcg_gen_movi_i32(tmp, val);
3530        store_cpu_field(tmp, condexec_bits);
3531    }
3532    else if (s->condexec_mask_prev != 0) {
3533        TCGv tmp = new_tmp();
3534        tcg_gen_movi_i32(tmp, 0);
3535        store_cpu_field(tmp, condexec_bits);
3536    }
3537}
3538
3539static void gen_nop_hint(DisasContext *s, int val)
3540{
3541    switch (val) {
3542    case 3: /* wfi */
3543        gen_set_pc_im(s->pc);
3544        s->is_jmp = DISAS_WFI;
3545        break;
3546    case 2: /* wfe */
3547    case 4: /* sev */
3548        /* TODO: Implement SEV and WFE.  May help SMP performance.  */
3549    default: /* nop */
3550        break;
3551    }
3552}
3553
3554/* These macros help make the code more readable when migrating from the
3555   old dyngen helpers.  They should probably be removed when
3556   T0/T1 are removed.  */
3557#define CPU_T001 cpu_T[0], cpu_T[0], cpu_T[1]
3558#define CPU_T0E01 cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]
3559
3560#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
3561
3562static inline int gen_neon_add(int size)
3563{
3564    switch (size) {
3565    case 0: gen_helper_neon_add_u8(CPU_T001); break;
3566    case 1: gen_helper_neon_add_u16(CPU_T001); break;
3567    case 2: gen_op_addl_T0_T1(); break;
3568    default: return 1;
3569    }
3570    return 0;
3571}
3572
3573static inline void gen_neon_rsb(int size)
3574{
3575    switch (size) {
3576    case 0: gen_helper_neon_sub_u8(cpu_T[0], cpu_T[1], cpu_T[0]); break;
3577    case 1: gen_helper_neon_sub_u16(cpu_T[0], cpu_T[1], cpu_T[0]); break;
3578    case 2: gen_op_rsbl_T0_T1(); break;
3579    default: return;
3580    }
3581}
3582
3583/* 32-bit pairwise ops end up the same as the elementwise versions.  */
3584#define gen_helper_neon_pmax_s32  gen_helper_neon_max_s32
3585#define gen_helper_neon_pmax_u32  gen_helper_neon_max_u32
3586#define gen_helper_neon_pmin_s32  gen_helper_neon_min_s32
3587#define gen_helper_neon_pmin_u32  gen_helper_neon_min_u32
3588
3589/* FIXME: This is wrong.  They set the wrong overflow bit.  */
3590#define gen_helper_neon_qadd_s32(a, e, b, c) gen_helper_add_saturate(a, b, c)
3591#define gen_helper_neon_qadd_u32(a, e, b, c) gen_helper_add_usaturate(a, b, c)
3592#define gen_helper_neon_qsub_s32(a, e, b, c) gen_helper_sub_saturate(a, b, c)
3593#define gen_helper_neon_qsub_u32(a, e, b, c) gen_helper_sub_usaturate(a, b, c)
3594
3595#define GEN_NEON_INTEGER_OP_ENV(name) do { \
3596    switch ((size << 1) | u) { \
3597    case 0: \
3598        gen_helper_neon_##name##_s8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3599        break; \
3600    case 1: \
3601        gen_helper_neon_##name##_u8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3602        break; \
3603    case 2: \
3604        gen_helper_neon_##name##_s16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3605        break; \
3606    case 3: \
3607        gen_helper_neon_##name##_u16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3608        break; \
3609    case 4: \
3610        gen_helper_neon_##name##_s32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3611        break; \
3612    case 5: \
3613        gen_helper_neon_##name##_u32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3614        break; \
3615    default: return 1; \
3616    }} while (0)
3617
3618#define GEN_NEON_INTEGER_OP(name) do { \
3619    switch ((size << 1) | u) { \
3620    case 0: \
3621        gen_helper_neon_##name##_s8(cpu_T[0], cpu_T[0], cpu_T[1]); \
3622        break; \
3623    case 1: \
3624        gen_helper_neon_##name##_u8(cpu_T[0], cpu_T[0], cpu_T[1]); \
3625        break; \
3626    case 2: \
3627        gen_helper_neon_##name##_s16(cpu_T[0], cpu_T[0], cpu_T[1]); \
3628        break; \
3629    case 3: \
3630        gen_helper_neon_##name##_u16(cpu_T[0], cpu_T[0], cpu_T[1]); \
3631        break; \
3632    case 4: \
3633        gen_helper_neon_##name##_s32(cpu_T[0], cpu_T[0], cpu_T[1]); \
3634        break; \
3635    case 5: \
3636        gen_helper_neon_##name##_u32(cpu_T[0], cpu_T[0], cpu_T[1]); \
3637        break; \
3638    default: return 1; \
3639    }} while (0)
3640
3641static inline void
3642gen_neon_movl_scratch_T0(int scratch)
3643{
3644  uint32_t offset;
3645
3646  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3647  tcg_gen_st_i32(cpu_T[0], cpu_env, offset);
3648}
3649
3650static inline void
3651gen_neon_movl_scratch_T1(int scratch)
3652{
3653  uint32_t offset;
3654
3655  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3656  tcg_gen_st_i32(cpu_T[1], cpu_env, offset);
3657}
3658
3659static inline void
3660gen_neon_movl_T0_scratch(int scratch)
3661{
3662  uint32_t offset;
3663
3664  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3665  tcg_gen_ld_i32(cpu_T[0], cpu_env, offset);
3666}
3667
3668static inline void
3669gen_neon_movl_T1_scratch(int scratch)
3670{
3671  uint32_t offset;
3672
3673  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3674  tcg_gen_ld_i32(cpu_T[1], cpu_env, offset);
3675}
3676
3677static inline void gen_neon_get_scalar(int size, int reg)
3678{
3679    if (size == 1) {
3680        NEON_GET_REG(T0, reg >> 1, reg & 1);
3681    } else {
3682        NEON_GET_REG(T0, reg >> 2, (reg >> 1) & 1);
3683        if (reg & 1)
3684            gen_neon_dup_low16(cpu_T[0]);
3685        else
3686            gen_neon_dup_high16(cpu_T[0]);
3687    }
3688}
3689
3690static void gen_neon_unzip(int reg, int q, int tmp, int size)
3691{
3692    int n;
3693
3694    for (n = 0; n < q + 1; n += 2) {
3695        NEON_GET_REG(T0, reg, n);
3696        NEON_GET_REG(T0, reg, n + n);
3697        switch (size) {
3698        case 0: gen_helper_neon_unzip_u8(); break;
3699        case 1: gen_helper_neon_zip_u16(); break; /* zip and unzip are the same.  */
3700        case 2: /* no-op */; break;
3701        default: abort();
3702        }
3703        gen_neon_movl_scratch_T0(tmp + n);
3704        gen_neon_movl_scratch_T1(tmp + n + 1);
3705    }
3706}
3707
3708static struct {
3709    int nregs;
3710    int interleave;
3711    int spacing;
3712} neon_ls_element_type[11] = {
3713    {4, 4, 1},
3714    {4, 4, 2},
3715    {4, 1, 1},
3716    {4, 2, 1},
3717    {3, 3, 1},
3718    {3, 3, 2},
3719    {3, 1, 1},
3720    {1, 1, 1},
3721    {2, 2, 1},
3722    {2, 2, 2},
3723    {2, 1, 1}
3724};
3725
3726/* Translate a NEON load/store element instruction.  Return nonzero if the
3727   instruction is invalid.  */
3728static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
3729{
3730    int rd, rn, rm;
3731    int op;
3732    int nregs;
3733    int interleave;
3734    int stride;
3735    int size;
3736    int reg;
3737    int pass;
3738    int load;
3739    int shift;
3740    int n;
3741    TCGv tmp;
3742    TCGv tmp2;
3743
3744    if (!vfp_enabled(env))
3745      return 1;
3746    VFP_DREG_D(rd, insn);
3747    rn = (insn >> 16) & 0xf;
3748    rm = insn & 0xf;
3749    load = (insn & (1 << 21)) != 0;
3750    if ((insn & (1 << 23)) == 0) {
3751        /* Load store all elements.  */
3752        op = (insn >> 8) & 0xf;
3753        size = (insn >> 6) & 3;
3754        if (op > 10 || size == 3)
3755            return 1;
3756        nregs = neon_ls_element_type[op].nregs;
3757        interleave = neon_ls_element_type[op].interleave;
3758        gen_movl_T1_reg(s, rn);
3759        stride = (1 << size) * interleave;
3760        for (reg = 0; reg < nregs; reg++) {
3761            if (interleave > 2 || (interleave == 2 && nregs == 2)) {
3762                gen_movl_T1_reg(s, rn);
3763                gen_op_addl_T1_im((1 << size) * reg);
3764            } else if (interleave == 2 && nregs == 4 && reg == 2) {
3765                gen_movl_T1_reg(s, rn);
3766                gen_op_addl_T1_im(1 << size);
3767            }
3768            for (pass = 0; pass < 2; pass++) {
3769                if (size == 2) {
3770                    if (load) {
3771                        tmp = gen_ld32(cpu_T[1], IS_USER(s));
3772                        neon_store_reg(rd, pass, tmp);
3773                    } else {
3774                        tmp = neon_load_reg(rd, pass);
3775                        gen_st32(tmp, cpu_T[1], IS_USER(s));
3776                    }
3777                    gen_op_addl_T1_im(stride);
3778                } else if (size == 1) {
3779                    if (load) {
3780                        tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3781                        gen_op_addl_T1_im(stride);
3782                        tmp2 = gen_ld16u(cpu_T[1], IS_USER(s));
3783                        gen_op_addl_T1_im(stride);
3784                        gen_bfi(tmp, tmp, tmp2, 16, 0xffff);
3785                        dead_tmp(tmp2);
3786                        neon_store_reg(rd, pass, tmp);
3787                    } else {
3788                        tmp = neon_load_reg(rd, pass);
3789                        tmp2 = new_tmp();
3790                        tcg_gen_shri_i32(tmp2, tmp, 16);
3791                        gen_st16(tmp, cpu_T[1], IS_USER(s));
3792                        gen_op_addl_T1_im(stride);
3793                        gen_st16(tmp2, cpu_T[1], IS_USER(s));
3794                        gen_op_addl_T1_im(stride);
3795                    }
3796                } else /* size == 0 */ {
3797                    if (load) {
3798                        TCGV_UNUSED(tmp2);
3799                        for (n = 0; n < 4; n++) {
3800                            tmp = gen_ld8u(cpu_T[1], IS_USER(s));
3801                            gen_op_addl_T1_im(stride);
3802                            if (n == 0) {
3803                                tmp2 = tmp;
3804                            } else {
3805                                gen_bfi(tmp2, tmp2, tmp, n * 8, 0xff);
3806                                dead_tmp(tmp);
3807                            }
3808                        }
3809                        neon_store_reg(rd, pass, tmp2);
3810                    } else {
3811                        tmp2 = neon_load_reg(rd, pass);
3812                        for (n = 0; n < 4; n++) {
3813                            tmp = new_tmp();
3814                            if (n == 0) {
3815                                tcg_gen_mov_i32(tmp, tmp2);
3816                            } else {
3817                                tcg_gen_shri_i32(tmp, tmp2, n * 8);
3818                            }
3819                            gen_st8(tmp, cpu_T[1], IS_USER(s));
3820                            gen_op_addl_T1_im(stride);
3821                        }
3822                        dead_tmp(tmp2);
3823                    }
3824                }
3825            }
3826            rd += neon_ls_element_type[op].spacing;
3827        }
3828        stride = nregs * 8;
3829    } else {
3830        size = (insn >> 10) & 3;
3831        if (size == 3) {
3832            /* Load single element to all lanes.  */
3833            if (!load)
3834                return 1;
3835            size = (insn >> 6) & 3;
3836            nregs = ((insn >> 8) & 3) + 1;
3837            stride = (insn & (1 << 5)) ? 2 : 1;
3838            gen_movl_T1_reg(s, rn);
3839            for (reg = 0; reg < nregs; reg++) {
3840                switch (size) {
3841                case 0:
3842                    tmp = gen_ld8u(cpu_T[1], IS_USER(s));
3843                    gen_neon_dup_u8(tmp, 0);
3844                    break;
3845                case 1:
3846                    tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3847                    gen_neon_dup_low16(tmp);
3848                    break;
3849                case 2:
3850                    tmp = gen_ld32(cpu_T[1], IS_USER(s));
3851                    break;
3852                case 3:
3853                    return 1;
3854                default: /* Avoid compiler warnings.  */
3855                    abort();
3856                }
3857                gen_op_addl_T1_im(1 << size);
3858                tmp2 = new_tmp();
3859                tcg_gen_mov_i32(tmp2, tmp);
3860                neon_store_reg(rd, 0, tmp2);
3861                neon_store_reg(rd, 1, tmp);
3862                rd += stride;
3863            }
3864            stride = (1 << size) * nregs;
3865        } else {
3866            /* Single element.  */
3867            pass = (insn >> 7) & 1;
3868            switch (size) {
3869            case 0:
3870                shift = ((insn >> 5) & 3) * 8;
3871                stride = 1;
3872                break;
3873            case 1:
3874                shift = ((insn >> 6) & 1) * 16;
3875                stride = (insn & (1 << 5)) ? 2 : 1;
3876                break;
3877            case 2:
3878                shift = 0;
3879                stride = (insn & (1 << 6)) ? 2 : 1;
3880                break;
3881            default:
3882                abort();
3883            }
3884            nregs = ((insn >> 8) & 3) + 1;
3885            gen_movl_T1_reg(s, rn);
3886            for (reg = 0; reg < nregs; reg++) {
3887                if (load) {
3888                    switch (size) {
3889                    case 0:
3890                        tmp = gen_ld8u(cpu_T[1], IS_USER(s));
3891                        break;
3892                    case 1:
3893                        tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3894                        break;
3895                    case 2:
3896                        tmp = gen_ld32(cpu_T[1], IS_USER(s));
3897                        break;
3898                    default: /* Avoid compiler warnings.  */
3899                        abort();
3900                    }
3901                    if (size != 2) {
3902                        tmp2 = neon_load_reg(rd, pass);
3903                        gen_bfi(tmp, tmp2, tmp, shift, size ? 0xffff : 0xff);
3904                        dead_tmp(tmp2);
3905                    }
3906                    neon_store_reg(rd, pass, tmp);
3907                } else { /* Store */
3908                    tmp = neon_load_reg(rd, pass);
3909                    if (shift)
3910                        tcg_gen_shri_i32(tmp, tmp, shift);
3911                    switch (size) {
3912                    case 0:
3913                        gen_st8(tmp, cpu_T[1], IS_USER(s));
3914                        break;
3915                    case 1:
3916                        gen_st16(tmp, cpu_T[1], IS_USER(s));
3917                        break;
3918                    case 2:
3919                        gen_st32(tmp, cpu_T[1], IS_USER(s));
3920                        break;
3921                    }
3922                }
3923                rd += stride;
3924                gen_op_addl_T1_im(1 << size);
3925            }
3926            stride = nregs * (1 << size);
3927        }
3928    }
3929    if (rm != 15) {
3930        TCGv base;
3931
3932        base = load_reg(s, rn);
3933        if (rm == 13) {
3934            tcg_gen_addi_i32(base, base, stride);
3935        } else {
3936            TCGv index;
3937            index = load_reg(s, rm);
3938            tcg_gen_add_i32(base, base, index);
3939            dead_tmp(index);
3940        }
3941        store_reg(s, rn, base);
3942    }
3943    return 0;
3944}
3945
3946/* Bitwise select.  dest = c ? t : f.  Clobbers T and F.  */
3947static void gen_neon_bsl(TCGv dest, TCGv t, TCGv f, TCGv c)
3948{
3949    tcg_gen_and_i32(t, t, c);
3950    tcg_gen_bic_i32(f, f, c);
3951    tcg_gen_or_i32(dest, t, f);
3952}
3953
3954static inline void gen_neon_narrow(int size, TCGv dest, TCGv_i64 src)
3955{
3956    switch (size) {
3957    case 0: gen_helper_neon_narrow_u8(dest, src); break;
3958    case 1: gen_helper_neon_narrow_u16(dest, src); break;
3959    case 2: tcg_gen_trunc_i64_i32(dest, src); break;
3960    default: abort();
3961    }
3962}
3963
3964static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv_i64 src)
3965{
3966    switch (size) {
3967    case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3968    case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3969    case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3970    default: abort();
3971    }
3972}
3973
3974static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv_i64 src)
3975{
3976    switch (size) {
3977    case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3978    case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3979    case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3980    default: abort();
3981    }
3982}
3983
3984static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift,
3985                                         int q, int u)
3986{
3987    if (q) {
3988        if (u) {
3989            switch (size) {
3990            case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3991            case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3992            default: abort();
3993            }
3994        } else {
3995            switch (size) {
3996            case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
3997            case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
3998            default: abort();
3999            }
4000        }
4001    } else {
4002        if (u) {
4003            switch (size) {
4004            case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
4005            case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
4006            default: abort();
4007            }
4008        } else {
4009            switch (size) {
4010            case 1: gen_helper_neon_shl_s16(var, var, shift); break;
4011            case 2: gen_helper_neon_shl_s32(var, var, shift); break;
4012            default: abort();
4013            }
4014        }
4015    }
4016}
4017
4018static inline void gen_neon_widen(TCGv_i64 dest, TCGv src, int size, int u)
4019{
4020    if (u) {
4021        switch (size) {
4022        case 0: gen_helper_neon_widen_u8(dest, src); break;
4023        case 1: gen_helper_neon_widen_u16(dest, src); break;
4024        case 2: tcg_gen_extu_i32_i64(dest, src); break;
4025        default: abort();
4026        }
4027    } else {
4028        switch (size) {
4029        case 0: gen_helper_neon_widen_s8(dest, src); break;
4030        case 1: gen_helper_neon_widen_s16(dest, src); break;
4031        case 2: tcg_gen_ext_i32_i64(dest, src); break;
4032        default: abort();
4033        }
4034    }
4035    dead_tmp(src);
4036}
4037
4038static inline void gen_neon_addl(int size)
4039{
4040    switch (size) {
4041    case 0: gen_helper_neon_addl_u16(CPU_V001); break;
4042    case 1: gen_helper_neon_addl_u32(CPU_V001); break;
4043    case 2: tcg_gen_add_i64(CPU_V001); break;
4044    default: abort();
4045    }
4046}
4047
4048static inline void gen_neon_subl(int size)
4049{
4050    switch (size) {
4051    case 0: gen_helper_neon_subl_u16(CPU_V001); break;
4052    case 1: gen_helper_neon_subl_u32(CPU_V001); break;
4053    case 2: tcg_gen_sub_i64(CPU_V001); break;
4054    default: abort();
4055    }
4056}
4057
4058static inline void gen_neon_negl(TCGv_i64 var, int size)
4059{
4060    switch (size) {
4061    case 0: gen_helper_neon_negl_u16(var, var); break;
4062    case 1: gen_helper_neon_negl_u32(var, var); break;
4063    case 2: gen_helper_neon_negl_u64(var, var); break;
4064    default: abort();
4065    }
4066}
4067
4068static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
4069{
4070    switch (size) {
4071    case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
4072    case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
4073    default: abort();
4074    }
4075}
4076
4077static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u)
4078{
4079    TCGv_i64 tmp;
4080
4081    switch ((size << 1) | u) {
4082    case 0: gen_helper_neon_mull_s8(dest, a, b); break;
4083    case 1: gen_helper_neon_mull_u8(dest, a, b); break;
4084    case 2: gen_helper_neon_mull_s16(dest, a, b); break;
4085    case 3: gen_helper_neon_mull_u16(dest, a, b); break;
4086    case 4:
4087        tmp = gen_muls_i64_i32(a, b);
4088        tcg_gen_mov_i64(dest, tmp);
4089        break;
4090    case 5:
4091        tmp = gen_mulu_i64_i32(a, b);
4092        tcg_gen_mov_i64(dest, tmp);
4093        break;
4094    default: abort();
4095    }
4096    if (size < 2) {
4097        dead_tmp(b);
4098        dead_tmp(a);
4099    }
4100}
4101
4102/* Translate a NEON data processing instruction.  Return nonzero if the
4103   instruction is invalid.
4104   We process data in a mixture of 32-bit and 64-bit chunks.
4105   Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
4106
4107static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4108{
4109    int op;
4110    int q;
4111    int rd, rn, rm;
4112    int size;
4113    int shift;
4114    int pass;
4115    int count;
4116    int pairwise;
4117    int u;
4118    int n;
4119    uint32_t imm;
4120    TCGv tmp;
4121    TCGv tmp2;
4122    TCGv tmp3;
4123    TCGv_i64 tmp64;
4124
4125    if (!vfp_enabled(env))
4126      return 1;
4127    q = (insn & (1 << 6)) != 0;
4128    u = (insn >> 24) & 1;
4129    VFP_DREG_D(rd, insn);
4130    VFP_DREG_N(rn, insn);
4131    VFP_DREG_M(rm, insn);
4132    size = (insn >> 20) & 3;
4133    if ((insn & (1 << 23)) == 0) {
4134        /* Three register same length.  */
4135        op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
4136        if (size == 3 && (op == 1 || op == 5 || op == 8 || op == 9
4137                          || op == 10 || op  == 11 || op == 16)) {
4138            /* 64-bit element instructions.  */
4139            for (pass = 0; pass < (q ? 2 : 1); pass++) {
4140                neon_load_reg64(cpu_V0, rn + pass);
4141                neon_load_reg64(cpu_V1, rm + pass);
4142                switch (op) {
4143                case 1: /* VQADD */
4144                    if (u) {
4145                        gen_helper_neon_add_saturate_u64(CPU_V001);
4146                    } else {
4147                        gen_helper_neon_add_saturate_s64(CPU_V001);
4148                    }
4149                    break;
4150                case 5: /* VQSUB */
4151                    if (u) {
4152                        gen_helper_neon_sub_saturate_u64(CPU_V001);
4153                    } else {
4154                        gen_helper_neon_sub_saturate_s64(CPU_V001);
4155                    }
4156                    break;
4157                case 8: /* VSHL */
4158                    if (u) {
4159                        gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
4160                    } else {
4161                        gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
4162                    }
4163                    break;
4164                case 9: /* VQSHL */
4165                    if (u) {
4166                        gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
4167                                                 cpu_V0, cpu_V0);
4168                    } else {
4169                        gen_helper_neon_qshl_s64(cpu_V1, cpu_env,
4170                                                 cpu_V1, cpu_V0);
4171                    }
4172                    break;
4173                case 10: /* VRSHL */
4174                    if (u) {
4175                        gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
4176                    } else {
4177                        gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
4178                    }
4179                    break;
4180                case 11: /* VQRSHL */
4181                    if (u) {
4182                        gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
4183                                                  cpu_V1, cpu_V0);
4184                    } else {
4185                        gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
4186                                                  cpu_V1, cpu_V0);
4187                    }
4188                    break;
4189                case 16:
4190                    if (u) {
4191                        tcg_gen_sub_i64(CPU_V001);
4192                    } else {
4193                        tcg_gen_add_i64(CPU_V001);
4194                    }
4195                    break;
4196                default:
4197                    abort();
4198                }
4199                neon_store_reg64(cpu_V0, rd + pass);
4200            }
4201            return 0;
4202        }
4203        switch (op) {
4204        case 8: /* VSHL */
4205        case 9: /* VQSHL */
4206        case 10: /* VRSHL */
4207        case 11: /* VQRSHL */
4208            {
4209                int rtmp;
4210                /* Shift instruction operands are reversed.  */
4211                rtmp = rn;
4212                rn = rm;
4213                rm = rtmp;
4214                pairwise = 0;
4215            }
4216            break;
4217        case 20: /* VPMAX */
4218        case 21: /* VPMIN */
4219        case 23: /* VPADD */
4220            pairwise = 1;
4221            break;
4222        case 26: /* VPADD (float) */
4223            pairwise = (u && size < 2);
4224            break;
4225        case 30: /* VPMIN/VPMAX (float) */
4226            pairwise = u;
4227            break;
4228        default:
4229            pairwise = 0;
4230            break;
4231        }
4232        for (pass = 0; pass < (q ? 4 : 2); pass++) {
4233
4234        if (pairwise) {
4235            /* Pairwise.  */
4236            if (q)
4237                n = (pass & 1) * 2;
4238            else
4239                n = 0;
4240            if (pass < q + 1) {
4241                NEON_GET_REG(T0, rn, n);
4242                NEON_GET_REG(T1, rn, n + 1);
4243            } else {
4244                NEON_GET_REG(T0, rm, n);
4245                NEON_GET_REG(T1, rm, n + 1);
4246            }
4247        } else {
4248            /* Elementwise.  */
4249            NEON_GET_REG(T0, rn, pass);
4250            NEON_GET_REG(T1, rm, pass);
4251        }
4252        switch (op) {
4253        case 0: /* VHADD */
4254            GEN_NEON_INTEGER_OP(hadd);
4255            break;
4256        case 1: /* VQADD */
4257            GEN_NEON_INTEGER_OP_ENV(qadd);
4258            break;
4259        case 2: /* VRHADD */
4260            GEN_NEON_INTEGER_OP(rhadd);
4261            break;
4262        case 3: /* Logic ops.  */
4263            switch ((u << 2) | size) {
4264            case 0: /* VAND */
4265                gen_op_andl_T0_T1();
4266                break;
4267            case 1: /* BIC */
4268                gen_op_bicl_T0_T1();
4269                break;
4270            case 2: /* VORR */
4271                gen_op_orl_T0_T1();
4272                break;
4273            case 3: /* VORN */
4274                gen_op_notl_T1();
4275                gen_op_orl_T0_T1();
4276                break;
4277            case 4: /* VEOR */
4278                gen_op_xorl_T0_T1();
4279                break;
4280            case 5: /* VBSL */
4281                tmp = neon_load_reg(rd, pass);
4282                gen_neon_bsl(cpu_T[0], cpu_T[0], cpu_T[1], tmp);
4283                dead_tmp(tmp);
4284                break;
4285            case 6: /* VBIT */
4286                tmp = neon_load_reg(rd, pass);
4287                gen_neon_bsl(cpu_T[0], cpu_T[0], tmp, cpu_T[1]);
4288                dead_tmp(tmp);
4289                break;
4290            case 7: /* VBIF */
4291                tmp = neon_load_reg(rd, pass);
4292                gen_neon_bsl(cpu_T[0], tmp, cpu_T[0], cpu_T[1]);
4293                dead_tmp(tmp);
4294                break;
4295            }
4296            break;
4297        case 4: /* VHSUB */
4298            GEN_NEON_INTEGER_OP(hsub);
4299            break;
4300        case 5: /* VQSUB */
4301            GEN_NEON_INTEGER_OP_ENV(qsub);
4302            break;
4303        case 6: /* VCGT */
4304            GEN_NEON_INTEGER_OP(cgt);
4305            break;
4306        case 7: /* VCGE */
4307            GEN_NEON_INTEGER_OP(cge);
4308            break;
4309        case 8: /* VSHL */
4310            GEN_NEON_INTEGER_OP(shl);
4311            break;
4312        case 9: /* VQSHL */
4313            GEN_NEON_INTEGER_OP_ENV(qshl);
4314            break;
4315        case 10: /* VRSHL */
4316            GEN_NEON_INTEGER_OP(rshl);
4317            break;
4318        case 11: /* VQRSHL */
4319            GEN_NEON_INTEGER_OP_ENV(qrshl);
4320            break;
4321        case 12: /* VMAX */
4322            GEN_NEON_INTEGER_OP(max);
4323            break;
4324        case 13: /* VMIN */
4325            GEN_NEON_INTEGER_OP(min);
4326            break;
4327        case 14: /* VABD */
4328            GEN_NEON_INTEGER_OP(abd);
4329            break;
4330        case 15: /* VABA */
4331            GEN_NEON_INTEGER_OP(abd);
4332            NEON_GET_REG(T1, rd, pass);
4333            gen_neon_add(size);
4334            break;
4335        case 16:
4336            if (!u) { /* VADD */
4337                if (gen_neon_add(size))
4338                    return 1;
4339            } else { /* VSUB */
4340                switch (size) {
4341                case 0: gen_helper_neon_sub_u8(CPU_T001); break;
4342                case 1: gen_helper_neon_sub_u16(CPU_T001); break;
4343                case 2: gen_op_subl_T0_T1(); break;
4344                default: return 1;
4345                }
4346            }
4347            break;
4348        case 17:
4349            if (!u) { /* VTST */
4350                switch (size) {
4351                case 0: gen_helper_neon_tst_u8(CPU_T001); break;
4352                case 1: gen_helper_neon_tst_u16(CPU_T001); break;
4353                case 2: gen_helper_neon_tst_u32(CPU_T001); break;
4354                default: return 1;
4355                }
4356            } else { /* VCEQ */
4357                switch (size) {
4358                case 0: gen_helper_neon_ceq_u8(CPU_T001); break;
4359                case 1: gen_helper_neon_ceq_u16(CPU_T001); break;
4360                case 2: gen_helper_neon_ceq_u32(CPU_T001); break;
4361                default: return 1;
4362                }
4363            }
4364            break;
4365        case 18: /* Multiply.  */
4366            switch (size) {
4367            case 0: gen_helper_neon_mul_u8(CPU_T001); break;
4368            case 1: gen_helper_neon_mul_u16(CPU_T001); break;
4369            case 2: gen_op_mul_T0_T1(); break;
4370            default: return 1;
4371            }
4372            NEON_GET_REG(T1, rd, pass);
4373            if (u) { /* VMLS */
4374                gen_neon_rsb(size);
4375            } else { /* VMLA */
4376                gen_neon_add(size);
4377            }
4378            break;
4379        case 19: /* VMUL */
4380            if (u) { /* polynomial */
4381                gen_helper_neon_mul_p8(CPU_T001);
4382            } else { /* Integer */
4383                switch (size) {
4384                case 0: gen_helper_neon_mul_u8(CPU_T001); break;
4385                case 1: gen_helper_neon_mul_u16(CPU_T001); break;
4386                case 2: gen_op_mul_T0_T1(); break;
4387                default: return 1;
4388                }
4389            }
4390            break;
4391        case 20: /* VPMAX */
4392            GEN_NEON_INTEGER_OP(pmax);
4393            break;
4394        case 21: /* VPMIN */
4395            GEN_NEON_INTEGER_OP(pmin);
4396            break;
4397        case 22: /* Hultiply high.  */
4398            if (!u) { /* VQDMULH */
4399                switch (size) {
4400                case 1: gen_helper_neon_qdmulh_s16(CPU_T0E01); break;
4401                case 2: gen_helper_neon_qdmulh_s32(CPU_T0E01); break;
4402                default: return 1;
4403                }
4404            } else { /* VQRDHMUL */
4405                switch (size) {
4406                case 1: gen_helper_neon_qrdmulh_s16(CPU_T0E01); break;
4407                case 2: gen_helper_neon_qrdmulh_s32(CPU_T0E01); break;
4408                default: return 1;
4409                }
4410            }
4411            break;
4412        case 23: /* VPADD */
4413            if (u)
4414                return 1;
4415            switch (size) {
4416            case 0: gen_helper_neon_padd_u8(CPU_T001); break;
4417            case 1: gen_helper_neon_padd_u16(CPU_T001); break;
4418            case 2: gen_op_addl_T0_T1(); break;
4419            default: return 1;
4420            }
4421            break;
4422        case 26: /* Floating point arithnetic.  */
4423            switch ((u << 2) | size) {
4424            case 0: /* VADD */
4425                gen_helper_neon_add_f32(CPU_T001);
4426                break;
4427            case 2: /* VSUB */
4428                gen_helper_neon_sub_f32(CPU_T001);
4429                break;
4430            case 4: /* VPADD */
4431                gen_helper_neon_add_f32(CPU_T001);
4432                break;
4433            case 6: /* VABD */
4434                gen_helper_neon_abd_f32(CPU_T001);
4435                break;
4436            default:
4437                return 1;
4438            }
4439            break;
4440        case 27: /* Float multiply.  */
4441            gen_helper_neon_mul_f32(CPU_T001);
4442            if (!u) {
4443                NEON_GET_REG(T1, rd, pass);
4444                if (size == 0) {
4445                    gen_helper_neon_add_f32(CPU_T001);
4446                } else {
4447                    gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]);
4448                }
4449            }
4450            break;
4451        case 28: /* Float compare.  */
4452            if (!u) {
4453                gen_helper_neon_ceq_f32(CPU_T001);
4454            } else {
4455                if (size == 0)
4456                    gen_helper_neon_cge_f32(CPU_T001);
4457                else
4458                    gen_helper_neon_cgt_f32(CPU_T001);
4459            }
4460            break;
4461        case 29: /* Float compare absolute.  */
4462            if (!u)
4463                return 1;
4464            if (size == 0)
4465                gen_helper_neon_acge_f32(CPU_T001);
4466            else
4467                gen_helper_neon_acgt_f32(CPU_T001);
4468            break;
4469        case 30: /* Float min/max.  */
4470            if (size == 0)
4471                gen_helper_neon_max_f32(CPU_T001);
4472            else
4473                gen_helper_neon_min_f32(CPU_T001);
4474            break;
4475        case 31:
4476            if (size == 0)
4477                gen_helper_recps_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env);
4478            else
4479                gen_helper_rsqrts_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env);
4480            break;
4481        default:
4482            abort();
4483        }
4484        /* Save the result.  For elementwise operations we can put it
4485           straight into the destination register.  For pairwise operations
4486           we have to be careful to avoid clobbering the source operands.  */
4487        if (pairwise && rd == rm) {
4488            gen_neon_movl_scratch_T0(pass);
4489        } else {
4490            NEON_SET_REG(T0, rd, pass);
4491        }
4492
4493        } /* for pass */
4494        if (pairwise && rd == rm) {
4495            for (pass = 0; pass < (q ? 4 : 2); pass++) {
4496                gen_neon_movl_T0_scratch(pass);
4497                NEON_SET_REG(T0, rd, pass);
4498            }
4499        }
4500        /* End of 3 register same size operations.  */
4501    } else if (insn & (1 << 4)) {
4502        if ((insn & 0x00380080) != 0) {
4503            /* Two registers and shift.  */
4504            op = (insn >> 8) & 0xf;
4505            if (insn & (1 << 7)) {
4506                /* 64-bit shift.   */
4507                size = 3;
4508            } else {
4509                size = 2;
4510                while ((insn & (1 << (size + 19))) == 0)
4511                    size--;
4512            }
4513            shift = (insn >> 16) & ((1 << (3 + size)) - 1);
4514            /* To avoid excessive dumplication of ops we implement shift
4515               by immediate using the variable shift operations.  */
4516            if (op < 8) {
4517                /* Shift by immediate:
4518                   VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
4519                /* Right shifts are encoded as N - shift, where N is the
4520                   element size in bits.  */
4521                if (op <= 4)
4522                    shift = shift - (1 << (size + 3));
4523                if (size == 3) {
4524                    count = q + 1;
4525                } else {
4526                    count = q ? 4: 2;
4527                }
4528                switch (size) {
4529                case 0:
4530                    imm = (uint8_t) shift;
4531                    imm |= imm << 8;
4532                    imm |= imm << 16;
4533                    break;
4534                case 1:
4535                    imm = (uint16_t) shift;
4536                    imm |= imm << 16;
4537                    break;
4538                case 2:
4539                case 3:
4540                    imm = shift;
4541                    break;
4542                default:
4543                    abort();
4544                }
4545
4546                for (pass = 0; pass < count; pass++) {
4547                    if (size == 3) {
4548                        neon_load_reg64(cpu_V0, rm + pass);
4549                        tcg_gen_movi_i64(cpu_V1, imm);
4550                        switch (op) {
4551                        case 0:  /* VSHR */
4552                        case 1:  /* VSRA */
4553                            if (u)
4554                                gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
4555                            else
4556                                gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1);
4557                            break;
4558                        case 2: /* VRSHR */
4559                        case 3: /* VRSRA */
4560                            if (u)
4561                                gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
4562                            else
4563                                gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
4564                            break;
4565                        case 4: /* VSRI */
4566                            if (!u)
4567                                return 1;
4568                            gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
4569                            break;
4570                        case 5: /* VSHL, VSLI */
4571                            gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
4572                            break;
4573                        case 6: /* VQSHL */
4574                            if (u)
4575                                gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
4576                            else
4577                                gen_helper_neon_qshl_s64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
4578                            break;
4579                        case 7: /* VQSHLU */
4580                            gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
4581                            break;
4582                        }
4583                        if (op == 1 || op == 3) {
4584                            /* Accumulate.  */
4585                            neon_load_reg64(cpu_V0, rd + pass);
4586                            tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
4587                        } else if (op == 4 || (op == 5 && u)) {
4588                            /* Insert */
4589                            cpu_abort(env, "VS[LR]I.64 not implemented");
4590                        }
4591                        neon_store_reg64(cpu_V0, rd + pass);
4592                    } else { /* size < 3 */
4593                        /* Operands in T0 and T1.  */
4594                        gen_op_movl_T1_im(imm);
4595                        NEON_GET_REG(T0, rm, pass);
4596                        switch (op) {
4597                        case 0:  /* VSHR */
4598                        case 1:  /* VSRA */
4599                            GEN_NEON_INTEGER_OP(shl);
4600                            break;
4601                        case 2: /* VRSHR */
4602                        case 3: /* VRSRA */
4603                            GEN_NEON_INTEGER_OP(rshl);
4604                            break;
4605                        case 4: /* VSRI */
4606                            if (!u)
4607                                return 1;
4608                            GEN_NEON_INTEGER_OP(shl);
4609                            break;
4610                        case 5: /* VSHL, VSLI */
4611                            switch (size) {
4612                            case 0: gen_helper_neon_shl_u8(CPU_T001); break;
4613                            case 1: gen_helper_neon_shl_u16(CPU_T001); break;
4614                            case 2: gen_helper_neon_shl_u32(CPU_T001); break;
4615                            default: return 1;
4616                            }
4617                            break;
4618                        case 6: /* VQSHL */
4619                            GEN_NEON_INTEGER_OP_ENV(qshl);
4620                            break;
4621                        case 7: /* VQSHLU */
4622                            switch (size) {
4623                            case 0: gen_helper_neon_qshl_u8(CPU_T0E01); break;
4624                            case 1: gen_helper_neon_qshl_u16(CPU_T0E01); break;
4625                            case 2: gen_helper_neon_qshl_u32(CPU_T0E01); break;
4626                            default: return 1;
4627                            }
4628                            break;
4629                        }
4630
4631                        if (op == 1 || op == 3) {
4632                            /* Accumulate.  */
4633                            NEON_GET_REG(T1, rd, pass);
4634                            gen_neon_add(size);
4635                        } else if (op == 4 || (op == 5 && u)) {
4636                            /* Insert */
4637                            switch (size) {
4638                            case 0:
4639                                if (op == 4)
4640                                    imm = 0xff >> -shift;
4641                                else
4642                                    imm = (uint8_t)(0xff << shift);
4643                                imm |= imm << 8;
4644                                imm |= imm << 16;
4645                                break;
4646                            case 1:
4647                                if (op == 4)
4648                                    imm = 0xffff >> -shift;
4649                                else
4650                                    imm = (uint16_t)(0xffff << shift);
4651                                imm |= imm << 16;
4652                                break;
4653                            case 2:
4654                                if (op == 4)
4655                                    imm = 0xffffffffu >> -shift;
4656                                else
4657                                    imm = 0xffffffffu << shift;
4658                                break;
4659                            default:
4660                                abort();
4661                            }
4662                            tmp = neon_load_reg(rd, pass);
4663                            tcg_gen_andi_i32(cpu_T[0], cpu_T[0], imm);
4664                            tcg_gen_andi_i32(tmp, tmp, ~imm);
4665                            tcg_gen_or_i32(cpu_T[0], cpu_T[0], tmp);
4666                        }
4667                        NEON_SET_REG(T0, rd, pass);
4668                    }
4669                } /* for pass */
4670            } else if (op < 10) {
4671                /* Shift by immediate and narrow:
4672                   VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
4673                shift = shift - (1 << (size + 3));
4674                size++;
4675                switch (size) {
4676                case 1:
4677                    imm = (uint16_t)shift;
4678                    imm |= imm << 16;
4679                    tmp2 = tcg_const_i32(imm);
4680                    TCGV_UNUSED_I64(tmp64);
4681                    break;
4682                case 2:
4683                    imm = (uint32_t)shift;
4684                    tmp2 = tcg_const_i32(imm);
4685                    TCGV_UNUSED_I64(tmp64);
4686                    break;
4687                case 3:
4688                    tmp64 = tcg_const_i64(shift);
4689                    TCGV_UNUSED(tmp2);
4690                    break;
4691                default:
4692                    abort();
4693                }
4694
4695                for (pass = 0; pass < 2; pass++) {
4696                    if (size == 3) {
4697                        neon_load_reg64(cpu_V0, rm + pass);
4698                        if (q) {
4699                          if (u)
4700                            gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp64);
4701                          else
4702                            gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp64);
4703                        } else {
4704                          if (u)
4705                            gen_helper_neon_shl_u64(cpu_V0, cpu_V0, tmp64);
4706                          else
4707                            gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp64);
4708                        }
4709                    } else {
4710                        tmp = neon_load_reg(rm + pass, 0);
4711                        gen_neon_shift_narrow(size, tmp, tmp2, q, u);
4712                        tmp3 = neon_load_reg(rm + pass, 1);
4713                        gen_neon_shift_narrow(size, tmp3, tmp2, q, u);
4714                        tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
4715                        dead_tmp(tmp);
4716                        dead_tmp(tmp3);
4717                    }
4718                    tmp = new_tmp();
4719                    if (op == 8 && !u) {
4720                        gen_neon_narrow(size - 1, tmp, cpu_V0);
4721                    } else {
4722                        if (op == 8)
4723                            gen_neon_narrow_sats(size - 1, tmp, cpu_V0);
4724                        else
4725                            gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
4726                    }
4727                    if (pass == 0) {
4728                        tmp2 = tmp;
4729                    } else {
4730                        neon_store_reg(rd, 0, tmp2);
4731                        neon_store_reg(rd, 1, tmp);
4732                    }
4733                } /* for pass */
4734            } else if (op == 10) {
4735                /* VSHLL */
4736                if (q || size == 3)
4737                    return 1;
4738                tmp = neon_load_reg(rm, 0);
4739                tmp2 = neon_load_reg(rm, 1);
4740                for (pass = 0; pass < 2; pass++) {
4741                    if (pass == 1)
4742                        tmp = tmp2;
4743
4744                    gen_neon_widen(cpu_V0, tmp, size, u);
4745
4746                    if (shift != 0) {
4747                        /* The shift is less than the width of the source
4748                           type, so we can just shift the whole register.  */
4749                        tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
4750                        if (size < 2 || !u) {
4751                            uint64_t imm64;
4752                            if (size == 0) {
4753                                imm = (0xffu >> (8 - shift));
4754                                imm |= imm << 16;
4755                            } else {
4756                                imm = 0xffff >> (16 - shift);
4757                            }
4758                            imm64 = imm | (((uint64_t)imm) << 32);
4759                            tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64);
4760                        }
4761                    }
4762                    neon_store_reg64(cpu_V0, rd + pass);
4763                }
4764            } else if (op == 15 || op == 16) {
4765                /* VCVT fixed-point.  */
4766                for (pass = 0; pass < (q ? 4 : 2); pass++) {
4767                    tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
4768                    if (op & 1) {
4769                        if (u)
4770                            gen_vfp_ulto(0, shift);
4771                        else
4772                            gen_vfp_slto(0, shift);
4773                    } else {
4774                        if (u)
4775                            gen_vfp_toul(0, shift);
4776                        else
4777                            gen_vfp_tosl(0, shift);
4778                    }
4779                    tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
4780                }
4781            } else {
4782                return 1;
4783            }
4784        } else { /* (insn & 0x00380080) == 0 */
4785            int invert;
4786
4787            op = (insn >> 8) & 0xf;
4788            /* One register and immediate.  */
4789            imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
4790            invert = (insn & (1 << 5)) != 0;
4791            switch (op) {
4792            case 0: case 1:
4793                /* no-op */
4794                break;
4795            case 2: case 3:
4796                imm <<= 8;
4797                break;
4798            case 4: case 5:
4799                imm <<= 16;
4800                break;
4801            case 6: case 7:
4802                imm <<= 24;
4803                break;
4804            case 8: case 9:
4805                imm |= imm << 16;
4806                break;
4807            case 10: case 11:
4808                imm = (imm << 8) | (imm << 24);
4809                break;
4810            case 12:
4811                imm = (imm < 8) | 0xff;
4812                break;
4813            case 13:
4814                imm = (imm << 16) | 0xffff;
4815                break;
4816            case 14:
4817                imm |= (imm << 8) | (imm << 16) | (imm << 24);
4818                if (invert)
4819                    imm = ~imm;
4820                break;
4821            case 15:
4822                imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
4823                      | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
4824                break;
4825            }
4826            if (invert)
4827                imm = ~imm;
4828
4829            if (op != 14 || !invert)
4830                gen_op_movl_T1_im(imm);
4831
4832            for (pass = 0; pass < (q ? 4 : 2); pass++) {
4833                if (op & 1 && op < 12) {
4834                    tmp = neon_load_reg(rd, pass);
4835                    if (invert) {
4836                        /* The immediate value has already been inverted, so
4837                           BIC becomes AND.  */
4838                        tcg_gen_andi_i32(tmp, tmp, imm);
4839                    } else {
4840                        tcg_gen_ori_i32(tmp, tmp, imm);
4841                    }
4842                } else {
4843                    /* VMOV, VMVN.  */
4844                    tmp = new_tmp();
4845                    if (op == 14 && invert) {
4846                        uint32_t val;
4847                        val = 0;
4848                        for (n = 0; n < 4; n++) {
4849                            if (imm & (1 << (n + (pass & 1) * 4)))
4850                                val |= 0xff << (n * 8);
4851                        }
4852                        tcg_gen_movi_i32(tmp, val);
4853                    } else {
4854                        tcg_gen_movi_i32(tmp, imm);
4855                    }
4856                }
4857                neon_store_reg(rd, pass, tmp);
4858            }
4859        }
4860    } else { /* (insn & 0x00800010 == 0x00800000) */
4861        if (size != 3) {
4862            op = (insn >> 8) & 0xf;
4863            if ((insn & (1 << 6)) == 0) {
4864                /* Three registers of different lengths.  */
4865                int src1_wide;
4866                int src2_wide;
4867                int prewiden;
4868                /* prewiden, src1_wide, src2_wide */
4869                static const int neon_3reg_wide[16][3] = {
4870                    {1, 0, 0}, /* VADDL */
4871                    {1, 1, 0}, /* VADDW */
4872                    {1, 0, 0}, /* VSUBL */
4873                    {1, 1, 0}, /* VSUBW */
4874                    {0, 1, 1}, /* VADDHN */
4875                    {0, 0, 0}, /* VABAL */
4876                    {0, 1, 1}, /* VSUBHN */
4877                    {0, 0, 0}, /* VABDL */
4878                    {0, 0, 0}, /* VMLAL */
4879                    {0, 0, 0}, /* VQDMLAL */
4880                    {0, 0, 0}, /* VMLSL */
4881                    {0, 0, 0}, /* VQDMLSL */
4882                    {0, 0, 0}, /* Integer VMULL */
4883                    {0, 0, 0}, /* VQDMULL */
4884                    {0, 0, 0}  /* Polynomial VMULL */
4885                };
4886
4887                prewiden = neon_3reg_wide[op][0];
4888                src1_wide = neon_3reg_wide[op][1];
4889                src2_wide = neon_3reg_wide[op][2];
4890
4891                if (size == 0 && (op == 9 || op == 11 || op == 13))
4892                    return 1;
4893
4894                /* Avoid overlapping operands.  Wide source operands are
4895                   always aligned so will never overlap with wide
4896                   destinations in problematic ways.  */
4897                if (rd == rm && !src2_wide) {
4898                    NEON_GET_REG(T0, rm, 1);
4899                    gen_neon_movl_scratch_T0(2);
4900                } else if (rd == rn && !src1_wide) {
4901                    NEON_GET_REG(T0, rn, 1);
4902                    gen_neon_movl_scratch_T0(2);
4903                }
4904                TCGV_UNUSED(tmp3);
4905                for (pass = 0; pass < 2; pass++) {
4906                    if (src1_wide) {
4907                        neon_load_reg64(cpu_V0, rn + pass);
4908                        TCGV_UNUSED(tmp);
4909                    } else {
4910                        if (pass == 1 && rd == rn) {
4911                            gen_neon_movl_T0_scratch(2);
4912                            tmp = new_tmp();
4913                            tcg_gen_mov_i32(tmp, cpu_T[0]);
4914                        } else {
4915                            tmp = neon_load_reg(rn, pass);
4916                        }
4917                        if (prewiden) {
4918                            gen_neon_widen(cpu_V0, tmp, size, u);
4919                        }
4920                    }
4921                    if (src2_wide) {
4922                        neon_load_reg64(cpu_V1, rm + pass);
4923                        TCGV_UNUSED(tmp2);
4924                    } else {
4925                        if (pass == 1 && rd == rm) {
4926                            gen_neon_movl_T0_scratch(2);
4927                            tmp2 = new_tmp();
4928                            tcg_gen_mov_i32(tmp2, cpu_T[0]);
4929                        } else {
4930                            tmp2 = neon_load_reg(rm, pass);
4931                        }
4932                        if (prewiden) {
4933                            gen_neon_widen(cpu_V1, tmp2, size, u);
4934                        }
4935                    }
4936                    switch (op) {
4937                    case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
4938                        gen_neon_addl(size);
4939                        break;
4940                    case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHL, VRSUBHL */
4941                        gen_neon_subl(size);
4942                        break;
4943                    case 5: case 7: /* VABAL, VABDL */
4944                        switch ((size << 1) | u) {
4945                        case 0:
4946                            gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
4947                            break;
4948                        case 1:
4949                            gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
4950                            break;
4951                        case 2:
4952                            gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
4953                            break;
4954                        case 3:
4955                            gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
4956                            break;
4957                        case 4:
4958                            gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
4959                            break;
4960                        case 5:
4961                            gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
4962                            break;
4963                        default: abort();
4964                        }
4965                        dead_tmp(tmp2);
4966                        dead_tmp(tmp);
4967                        break;
4968                    case 8: case 9: case 10: case 11: case 12: case 13:
4969                        /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
4970                        gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
4971                        break;
4972                    case 14: /* Polynomial VMULL */
4973                        cpu_abort(env, "Polynomial VMULL not implemented");
4974
4975                    default: /* 15 is RESERVED.  */
4976                        return 1;
4977                    }
4978                    if (op == 5 || op == 13 || (op >= 8 && op <= 11)) {
4979                        /* Accumulate.  */
4980                        if (op == 10 || op == 11) {
4981                            gen_neon_negl(cpu_V0, size);
4982                        }
4983
4984                        if (op != 13) {
4985                            neon_load_reg64(cpu_V1, rd + pass);
4986                        }
4987
4988                        switch (op) {
4989                        case 5: case 8: case 10: /* VABAL, VMLAL, VMLSL */
4990                            gen_neon_addl(size);
4991                            break;
4992                        case 9: case 11: /* VQDMLAL, VQDMLSL */
4993                            gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
4994                            gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
4995                            break;
4996                            /* Fall through.  */
4997                        case 13: /* VQDMULL */
4998                            gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
4999                            break;
5000                        default:
5001                            abort();
5002                        }
5003                        neon_store_reg64(cpu_V0, rd + pass);
5004                    } else if (op == 4 || op == 6) {
5005                        /* Narrowing operation.  */
5006                        tmp = new_tmp();
5007                        if (u) {
5008                            switch (size) {
5009                            case 0:
5010                                gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
5011                                break;
5012                            case 1:
5013                                gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
5014                                break;
5015                            case 2:
5016                                tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
5017                                tcg_gen_trunc_i64_i32(tmp, cpu_V0);
5018                                break;
5019                            default: abort();
5020                            }
5021                        } else {
5022                            switch (size) {
5023                            case 0:
5024                                gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
5025                                break;
5026                            case 1:
5027                                gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
5028                                break;
5029                            case 2:
5030                                tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
5031                                tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
5032                                tcg_gen_trunc_i64_i32(tmp, cpu_V0);
5033                                break;
5034                            default: abort();
5035                            }
5036                        }
5037                        if (pass == 0) {
5038                            tmp3 = tmp;
5039                        } else {
5040                            neon_store_reg(rd, 0, tmp3);
5041                            neon_store_reg(rd, 1, tmp);
5042                        }
5043                    } else {
5044                        /* Write back the result.  */
5045                        neon_store_reg64(cpu_V0, rd + pass);
5046                    }
5047                }
5048            } else {
5049                /* Two registers and a scalar.  */
5050                switch (op) {
5051                case 0: /* Integer VMLA scalar */
5052                case 1: /* Float VMLA scalar */
5053                case 4: /* Integer VMLS scalar */
5054                case 5: /* Floating point VMLS scalar */
5055                case 8: /* Integer VMUL scalar */
5056                case 9: /* Floating point VMUL scalar */
5057                case 12: /* VQDMULH scalar */
5058                case 13: /* VQRDMULH scalar */
5059                    gen_neon_get_scalar(size, rm);
5060                    gen_neon_movl_scratch_T0(0);
5061                    for (pass = 0; pass < (u ? 4 : 2); pass++) {
5062                        if (pass != 0)
5063                            gen_neon_movl_T0_scratch(0);
5064                        NEON_GET_REG(T1, rn, pass);
5065                        if (op == 12) {
5066                            if (size == 1) {
5067                                gen_helper_neon_qdmulh_s16(CPU_T0E01);
5068                            } else {
5069                                gen_helper_neon_qdmulh_s32(CPU_T0E01);
5070                            }
5071                        } else if (op == 13) {
5072                            if (size == 1) {
5073                                gen_helper_neon_qrdmulh_s16(CPU_T0E01);
5074                            } else {
5075                                gen_helper_neon_qrdmulh_s32(CPU_T0E01);
5076                            }
5077                        } else if (op & 1) {
5078                            gen_helper_neon_mul_f32(CPU_T001);
5079                        } else {
5080                            switch (size) {
5081                            case 0: gen_helper_neon_mul_u8(CPU_T001); break;
5082                            case 1: gen_helper_neon_mul_u16(CPU_T001); break;
5083                            case 2: gen_op_mul_T0_T1(); break;
5084                            default: return 1;
5085                            }
5086                        }
5087                        if (op < 8) {
5088                            /* Accumulate.  */
5089                            NEON_GET_REG(T1, rd, pass);
5090                            switch (op) {
5091                            case 0:
5092                                gen_neon_add(size);
5093                                break;
5094                            case 1:
5095                                gen_helper_neon_add_f32(CPU_T001);
5096                                break;
5097                            case 4:
5098                                gen_neon_rsb(size);
5099                                break;
5100                            case 5:
5101                                gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]);
5102                                break;
5103                            default:
5104                                abort();
5105                            }
5106                        }
5107                        NEON_SET_REG(T0, rd, pass);
5108                    }
5109                    break;
5110                case 2: /* VMLAL sclar */
5111                case 3: /* VQDMLAL scalar */
5112                case 6: /* VMLSL scalar */
5113                case 7: /* VQDMLSL scalar */
5114                case 10: /* VMULL scalar */
5115                case 11: /* VQDMULL scalar */
5116                    if (size == 0 && (op == 3 || op == 7 || op == 11))
5117                        return 1;
5118
5119                    gen_neon_get_scalar(size, rm);
5120                    NEON_GET_REG(T1, rn, 1);
5121
5122                    for (pass = 0; pass < 2; pass++) {
5123                        if (pass == 0) {
5124                            tmp = neon_load_reg(rn, 0);
5125                        } else {
5126                            tmp = new_tmp();
5127                            tcg_gen_mov_i32(tmp, cpu_T[1]);
5128                        }
5129                        tmp2 = new_tmp();
5130                        tcg_gen_mov_i32(tmp2, cpu_T[0]);
5131                        gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5132                        if (op == 6 || op == 7) {
5133                            gen_neon_negl(cpu_V0, size);
5134                        }
5135                        if (op != 11) {
5136                            neon_load_reg64(cpu_V1, rd + pass);
5137                        }
5138                        switch (op) {
5139                        case 2: case 6:
5140                            gen_neon_addl(size);
5141                            break;
5142                        case 3: case 7:
5143                            gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5144                            gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5145                            break;
5146                        case 10:
5147                            /* no-op */
5148                            break;
5149                        case 11:
5150                            gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5151                            break;
5152                        default:
5153                            abort();
5154                        }
5155                        neon_store_reg64(cpu_V0, rd + pass);
5156                    }
5157                    break;
5158                default: /* 14 and 15 are RESERVED */
5159                    return 1;
5160                }
5161            }
5162        } else { /* size == 3 */
5163            if (!u) {
5164                /* Extract.  */
5165                imm = (insn >> 8) & 0xf;
5166                count = q + 1;
5167
5168                if (imm > 7 && !q)
5169                    return 1;
5170
5171                if (imm == 0) {
5172                    neon_load_reg64(cpu_V0, rn);
5173                    if (q) {
5174                        neon_load_reg64(cpu_V1, rn + 1);
5175                    }
5176                } else if (imm == 8) {
5177                    neon_load_reg64(cpu_V0, rn + 1);
5178                    if (q) {
5179                        neon_load_reg64(cpu_V1, rm);
5180                    }
5181                } else if (q) {
5182                    tmp64 = tcg_temp_new_i64();
5183                    if (imm < 8) {
5184                        neon_load_reg64(cpu_V0, rn);
5185                        neon_load_reg64(tmp64, rn + 1);
5186                    } else {
5187                        neon_load_reg64(cpu_V0, rn + 1);
5188                        neon_load_reg64(tmp64, rm);
5189                    }
5190                    tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
5191                    tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
5192                    tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5193                    if (imm < 8) {
5194                        neon_load_reg64(cpu_V1, rm);
5195                    } else {
5196                        neon_load_reg64(cpu_V1, rm + 1);
5197                        imm -= 8;
5198                    }
5199                    tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
5200                    tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
5201                    tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
5202                } else {
5203                    /* BUGFIX */
5204                    neon_load_reg64(cpu_V0, rn);
5205                    tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
5206                    neon_load_reg64(cpu_V1, rm);
5207                    tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
5208                    tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5209                }
5210                neon_store_reg64(cpu_V0, rd);
5211                if (q) {
5212                    neon_store_reg64(cpu_V1, rd + 1);
5213                }
5214            } else if ((insn & (1 << 11)) == 0) {
5215                /* Two register misc.  */
5216                op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
5217                size = (insn >> 18) & 3;
5218                switch (op) {
5219                case 0: /* VREV64 */
5220                    if (size == 3)
5221                        return 1;
5222                    for (pass = 0; pass < (q ? 2 : 1); pass++) {
5223                        NEON_GET_REG(T0, rm, pass * 2);
5224                        NEON_GET_REG(T1, rm, pass * 2 + 1);
5225                        switch (size) {
5226                        case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
5227                        case 1: gen_swap_half(cpu_T[0]); break;
5228                        case 2: /* no-op */ break;
5229                        default: abort();
5230                        }
5231                        NEON_SET_REG(T0, rd, pass * 2 + 1);
5232                        if (size == 2) {
5233                            NEON_SET_REG(T1, rd, pass * 2);
5234                        } else {
5235                            gen_op_movl_T0_T1();
5236                            switch (size) {
5237                            case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
5238                            case 1: gen_swap_half(cpu_T[0]); break;
5239                            default: abort();
5240                            }
5241                            NEON_SET_REG(T0, rd, pass * 2);
5242                        }
5243                    }
5244                    break;
5245                case 4: case 5: /* VPADDL */
5246                case 12: case 13: /* VPADAL */
5247                    if (size == 3)
5248                        return 1;
5249                    for (pass = 0; pass < q + 1; pass++) {
5250                        tmp = neon_load_reg(rm, pass * 2);
5251                        gen_neon_widen(cpu_V0, tmp, size, op & 1);
5252                        tmp = neon_load_reg(rm, pass * 2 + 1);
5253                        gen_neon_widen(cpu_V1, tmp, size, op & 1);
5254                        switch (size) {
5255                        case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
5256                        case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
5257                        case 2: tcg_gen_add_i64(CPU_V001); break;
5258                        default: abort();
5259                        }
5260                        if (op >= 12) {
5261                            /* Accumulate.  */
5262                            neon_load_reg64(cpu_V1, rd + pass);
5263                            gen_neon_addl(size);
5264                        }
5265                        neon_store_reg64(cpu_V0, rd + pass);
5266                    }
5267                    break;
5268                case 33: /* VTRN */
5269                    if (size == 2) {
5270                        for (n = 0; n < (q ? 4 : 2); n += 2) {
5271                            NEON_GET_REG(T0, rm, n);
5272                            NEON_GET_REG(T1, rd, n + 1);
5273                            NEON_SET_REG(T1, rm, n);
5274                            NEON_SET_REG(T0, rd, n + 1);
5275                        }
5276                    } else {
5277                        goto elementwise;
5278                    }
5279                    break;
5280                case 34: /* VUZP */
5281                    /* Reg  Before       After
5282                       Rd   A3 A2 A1 A0  B2 B0 A2 A0
5283                       Rm   B3 B2 B1 B0  B3 B1 A3 A1
5284                     */
5285                    if (size == 3)
5286                        return 1;
5287                    gen_neon_unzip(rd, q, 0, size);
5288                    gen_neon_unzip(rm, q, 4, size);
5289                    if (q) {
5290                        static int unzip_order_q[8] =
5291                            {0, 2, 4, 6, 1, 3, 5, 7};
5292                        for (n = 0; n < 8; n++) {
5293                            int reg = (n < 4) ? rd : rm;
5294                            gen_neon_movl_T0_scratch(unzip_order_q[n]);
5295                            NEON_SET_REG(T0, reg, n % 4);
5296                        }
5297                    } else {
5298                        static int unzip_order[4] =
5299                            {0, 4, 1, 5};
5300                        for (n = 0; n < 4; n++) {
5301                            int reg = (n < 2) ? rd : rm;
5302                            gen_neon_movl_T0_scratch(unzip_order[n]);
5303                            NEON_SET_REG(T0, reg, n % 2);
5304                        }
5305                    }
5306                    break;
5307                case 35: /* VZIP */
5308                    /* Reg  Before       After
5309                       Rd   A3 A2 A1 A0  B1 A1 B0 A0
5310                       Rm   B3 B2 B1 B0  B3 A3 B2 A2
5311                     */
5312                    if (size == 3)
5313                        return 1;
5314                    count = (q ? 4 : 2);
5315                    for (n = 0; n < count; n++) {
5316                        NEON_GET_REG(T0, rd, n);
5317                        NEON_GET_REG(T1, rd, n);
5318                        switch (size) {
5319                        case 0: gen_helper_neon_zip_u8(); break;
5320                        case 1: gen_helper_neon_zip_u16(); break;
5321                        case 2: /* no-op */; break;
5322                        default: abort();
5323                        }
5324                        gen_neon_movl_scratch_T0(n * 2);
5325                        gen_neon_movl_scratch_T1(n * 2 + 1);
5326                    }
5327                    for (n = 0; n < count * 2; n++) {
5328                        int reg = (n < count) ? rd : rm;
5329                        gen_neon_movl_T0_scratch(n);
5330                        NEON_SET_REG(T0, reg, n % count);
5331                    }
5332                    break;
5333                case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */
5334                    if (size == 3)
5335                        return 1;
5336                    TCGV_UNUSED(tmp2);
5337                    for (pass = 0; pass < 2; pass++) {
5338                        neon_load_reg64(cpu_V0, rm + pass);
5339                        tmp = new_tmp();
5340                        if (op == 36 && q == 0) {
5341                            gen_neon_narrow(size, tmp, cpu_V0);
5342                        } else if (q) {
5343                            gen_neon_narrow_satu(size, tmp, cpu_V0);
5344                        } else {
5345                            gen_neon_narrow_sats(size, tmp, cpu_V0);
5346                        }
5347                        if (pass == 0) {
5348                            tmp2 = tmp;
5349                        } else {
5350                            neon_store_reg(rd, 0, tmp2);
5351                            neon_store_reg(rd, 1, tmp);
5352                        }
5353                    }
5354                    break;
5355                case 38: /* VSHLL */
5356                    if (q || size == 3)
5357                        return 1;
5358                    tmp = neon_load_reg(rm, 0);
5359                    tmp2 = neon_load_reg(rm, 1);
5360                    for (pass = 0; pass < 2; pass++) {
5361                        if (pass == 1)
5362                            tmp = tmp2;
5363                        gen_neon_widen(cpu_V0, tmp, size, 1);
5364                        neon_store_reg64(cpu_V0, rd + pass);
5365                    }
5366                    break;
5367                default:
5368                elementwise:
5369                    for (pass = 0; pass < (q ? 4 : 2); pass++) {
5370                        if (op == 30 || op == 31 || op >= 58) {
5371                            tcg_gen_ld_f32(cpu_F0s, cpu_env,
5372                                           neon_reg_offset(rm, pass));
5373                        } else {
5374                            NEON_GET_REG(T0, rm, pass);
5375                        }
5376                        switch (op) {
5377                        case 1: /* VREV32 */
5378                            switch (size) {
5379                            case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
5380                            case 1: gen_swap_half(cpu_T[0]); break;
5381                            default: return 1;
5382                            }
5383                            break;
5384                        case 2: /* VREV16 */
5385                            if (size != 0)
5386                                return 1;
5387                            gen_rev16(cpu_T[0]);
5388                            break;
5389                        case 8: /* CLS */
5390                            switch (size) {
5391                            case 0: gen_helper_neon_cls_s8(cpu_T[0], cpu_T[0]); break;
5392                            case 1: gen_helper_neon_cls_s16(cpu_T[0], cpu_T[0]); break;
5393                            case 2: gen_helper_neon_cls_s32(cpu_T[0], cpu_T[0]); break;
5394                            default: return 1;
5395                            }
5396                            break;
5397                        case 9: /* CLZ */
5398                            switch (size) {
5399                            case 0: gen_helper_neon_clz_u8(cpu_T[0], cpu_T[0]); break;
5400                            case 1: gen_helper_neon_clz_u16(cpu_T[0], cpu_T[0]); break;
5401                            case 2: gen_helper_clz(cpu_T[0], cpu_T[0]); break;
5402                            default: return 1;
5403                            }
5404                            break;
5405                        case 10: /* CNT */
5406                            if (size != 0)
5407                                return 1;
5408                            gen_helper_neon_cnt_u8(cpu_T[0], cpu_T[0]);
5409                            break;
5410                        case 11: /* VNOT */
5411                            if (size != 0)
5412                                return 1;
5413                            gen_op_notl_T0();
5414                            break;
5415                        case 14: /* VQABS */
5416                            switch (size) {
5417                            case 0: gen_helper_neon_qabs_s8(cpu_T[0], cpu_env, cpu_T[0]); break;
5418                            case 1: gen_helper_neon_qabs_s16(cpu_T[0], cpu_env, cpu_T[0]); break;
5419                            case 2: gen_helper_neon_qabs_s32(cpu_T[0], cpu_env, cpu_T[0]); break;
5420                            default: return 1;
5421                            }
5422                            break;
5423                        case 15: /* VQNEG */
5424                            switch (size) {
5425                            case 0: gen_helper_neon_qneg_s8(cpu_T[0], cpu_env, cpu_T[0]); break;
5426                            case 1: gen_helper_neon_qneg_s16(cpu_T[0], cpu_env, cpu_T[0]); break;
5427                            case 2: gen_helper_neon_qneg_s32(cpu_T[0], cpu_env, cpu_T[0]); break;
5428                            default: return 1;
5429                            }
5430                            break;
5431                        case 16: case 19: /* VCGT #0, VCLE #0 */
5432                            gen_op_movl_T1_im(0);
5433                            switch(size) {
5434                            case 0: gen_helper_neon_cgt_s8(CPU_T001); break;
5435                            case 1: gen_helper_neon_cgt_s16(CPU_T001); break;
5436                            case 2: gen_helper_neon_cgt_s32(CPU_T001); break;
5437                            default: return 1;
5438                            }
5439                            if (op == 19)
5440                                gen_op_notl_T0();
5441                            break;
5442                        case 17: case 20: /* VCGE #0, VCLT #0 */
5443                            gen_op_movl_T1_im(0);
5444                            switch(size) {
5445                            case 0: gen_helper_neon_cge_s8(CPU_T001); break;
5446                            case 1: gen_helper_neon_cge_s16(CPU_T001); break;
5447                            case 2: gen_helper_neon_cge_s32(CPU_T001); break;
5448                            default: return 1;
5449                            }
5450                            if (op == 20)
5451                                gen_op_notl_T0();
5452                            break;
5453                        case 18: /* VCEQ #0 */
5454                            gen_op_movl_T1_im(0);
5455                            switch(size) {
5456                            case 0: gen_helper_neon_ceq_u8(CPU_T001); break;
5457                            case 1: gen_helper_neon_ceq_u16(CPU_T001); break;
5458                            case 2: gen_helper_neon_ceq_u32(CPU_T001); break;
5459                            default: return 1;
5460                            }
5461                            break;
5462                        case 22: /* VABS */
5463                            switch(size) {
5464                            case 0: gen_helper_neon_abs_s8(cpu_T[0], cpu_T[0]); break;
5465                            case 1: gen_helper_neon_abs_s16(cpu_T[0], cpu_T[0]); break;
5466                            case 2: tcg_gen_abs_i32(cpu_T[0], cpu_T[0]); break;
5467                            default: return 1;
5468                            }
5469                            break;
5470                        case 23: /* VNEG */
5471                            gen_op_movl_T1_im(0);
5472                            if (size == 3)
5473                                return 1;
5474                            gen_neon_rsb(size);
5475                            break;
5476                        case 24: case 27: /* Float VCGT #0, Float VCLE #0 */
5477                            gen_op_movl_T1_im(0);
5478                            gen_helper_neon_cgt_f32(CPU_T001);
5479                            if (op == 27)
5480                                gen_op_notl_T0();
5481                            break;
5482                        case 25: case 28: /* Float VCGE #0, Float VCLT #0 */
5483                            gen_op_movl_T1_im(0);
5484                            gen_helper_neon_cge_f32(CPU_T001);
5485                            if (op == 28)
5486                                gen_op_notl_T0();
5487                            break;
5488                        case 26: /* Float VCEQ #0 */
5489                            gen_op_movl_T1_im(0);
5490                            gen_helper_neon_ceq_f32(CPU_T001);
5491                            break;
5492                        case 30: /* Float VABS */
5493                            gen_vfp_abs(0);
5494                            break;
5495                        case 31: /* Float VNEG */
5496                            gen_vfp_neg(0);
5497                            break;
5498                        case 32: /* VSWP */
5499                            NEON_GET_REG(T1, rd, pass);
5500                            NEON_SET_REG(T1, rm, pass);
5501                            break;
5502                        case 33: /* VTRN */
5503                            NEON_GET_REG(T1, rd, pass);
5504                            switch (size) {
5505                            case 0: gen_helper_neon_trn_u8(); break;
5506                            case 1: gen_helper_neon_trn_u16(); break;
5507                            case 2: abort();
5508                            default: return 1;
5509                            }
5510                            NEON_SET_REG(T1, rm, pass);
5511                            break;
5512                        case 56: /* Integer VRECPE */
5513                            gen_helper_recpe_u32(cpu_T[0], cpu_T[0], cpu_env);
5514                            break;
5515                        case 57: /* Integer VRSQRTE */
5516                            gen_helper_rsqrte_u32(cpu_T[0], cpu_T[0], cpu_env);
5517                            break;
5518                        case 58: /* Float VRECPE */
5519                            gen_helper_recpe_f32(cpu_F0s, cpu_F0s, cpu_env);
5520                            break;
5521                        case 59: /* Float VRSQRTE */
5522                            gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env);
5523                            break;
5524                        case 60: /* VCVT.F32.S32 */
5525                            gen_vfp_tosiz(0);
5526                            break;
5527                        case 61: /* VCVT.F32.U32 */
5528                            gen_vfp_touiz(0);
5529                            break;
5530                        case 62: /* VCVT.S32.F32 */
5531                            gen_vfp_sito(0);
5532                            break;
5533                        case 63: /* VCVT.U32.F32 */
5534                            gen_vfp_uito(0);
5535                            break;
5536                        default:
5537                            /* Reserved: 21, 29, 39-56 */
5538                            return 1;
5539                        }
5540                        if (op == 30 || op == 31 || op >= 58) {
5541                            tcg_gen_st_f32(cpu_F0s, cpu_env,
5542                                           neon_reg_offset(rd, pass));
5543                        } else {
5544                            NEON_SET_REG(T0, rd, pass);
5545                        }
5546                    }
5547                    break;
5548                }
5549            } else if ((insn & (1 << 10)) == 0) {
5550                /* VTBL, VTBX.  */
5551                n = ((insn >> 5) & 0x18) + 8;
5552                if (insn & (1 << 6)) {
5553                    tmp = neon_load_reg(rd, 0);
5554                } else {
5555                    tmp = new_tmp();
5556                    tcg_gen_movi_i32(tmp, 0);
5557                }
5558                tmp2 = neon_load_reg(rm, 0);
5559                gen_helper_neon_tbl(tmp2, tmp2, tmp, tcg_const_i32(rn),
5560                                    tcg_const_i32(n));
5561                dead_tmp(tmp);
5562                if (insn & (1 << 6)) {
5563                    tmp = neon_load_reg(rd, 1);
5564                } else {
5565                    tmp = new_tmp();
5566                    tcg_gen_movi_i32(tmp, 0);
5567                }
5568                tmp3 = neon_load_reg(rm, 1);
5569                gen_helper_neon_tbl(tmp3, tmp3, tmp, tcg_const_i32(rn),
5570                                    tcg_const_i32(n));
5571                neon_store_reg(rd, 0, tmp2);
5572                neon_store_reg(rd, 1, tmp3);
5573                dead_tmp(tmp);
5574            } else if ((insn & 0x380) == 0) {
5575                /* VDUP */
5576                if (insn & (1 << 19)) {
5577                    NEON_SET_REG(T0, rm, 1);
5578                } else {
5579                    NEON_SET_REG(T0, rm, 0);
5580                }
5581                if (insn & (1 << 16)) {
5582                    gen_neon_dup_u8(cpu_T[0], ((insn >> 17) & 3) * 8);
5583                } else if (insn & (1 << 17)) {
5584                    if ((insn >> 18) & 1)
5585                        gen_neon_dup_high16(cpu_T[0]);
5586                    else
5587                        gen_neon_dup_low16(cpu_T[0]);
5588                }
5589                for (pass = 0; pass < (q ? 4 : 2); pass++) {
5590                    NEON_SET_REG(T0, rd, pass);
5591                }
5592            } else {
5593                return 1;
5594            }
5595        }
5596    }
5597    return 0;
5598}
5599
5600static int disas_cp14_read(CPUState * env, DisasContext *s, uint32_t insn)
5601{
5602    int crn = (insn >> 16) & 0xf;
5603    int crm = insn & 0xf;
5604    int op1 = (insn >> 21) & 7;
5605    int op2 = (insn >> 5) & 7;
5606    int rt = (insn >> 12) & 0xf;
5607    TCGv tmp;
5608
5609    if (arm_feature(env, ARM_FEATURE_THUMB2EE)) {
5610        if (op1 == 6 && crn == 0 && crm == 0 && op2 == 0) {
5611            /* TEECR */
5612            if (IS_USER(s))
5613                return 1;
5614            tmp = load_cpu_field(teecr);
5615            store_reg(s, rt, tmp);
5616            return 0;
5617        }
5618        if (op1 == 6 && crn == 1 && crm == 0 && op2 == 0) {
5619            /* TEEHBR */
5620            if (IS_USER(s) && (env->teecr & 1))
5621                return 1;
5622            tmp = load_cpu_field(teehbr);
5623            store_reg(s, rt, tmp);
5624            return 0;
5625        }
5626    }
5627    fprintf(stderr, "Unknown cp14 read op1:%d crn:%d crm:%d op2:%d\n",
5628            op1, crn, crm, op2);
5629    return 1;
5630}
5631
5632static int disas_cp14_write(CPUState * env, DisasContext *s, uint32_t insn)
5633{
5634    int crn = (insn >> 16) & 0xf;
5635    int crm = insn & 0xf;
5636    int op1 = (insn >> 21) & 7;
5637    int op2 = (insn >> 5) & 7;
5638    int rt = (insn >> 12) & 0xf;
5639    TCGv tmp;
5640
5641    if (arm_feature(env, ARM_FEATURE_THUMB2EE)) {
5642        if (op1 == 6 && crn == 0 && crm == 0 && op2 == 0) {
5643            /* TEECR */
5644            if (IS_USER(s))
5645                return 1;
5646            tmp = load_reg(s, rt);
5647            gen_helper_set_teecr(cpu_env, tmp);
5648            dead_tmp(tmp);
5649            return 0;
5650        }
5651        if (op1 == 6 && crn == 1 && crm == 0 && op2 == 0) {
5652            /* TEEHBR */
5653            if (IS_USER(s) && (env->teecr & 1))
5654                return 1;
5655            tmp = load_reg(s, rt);
5656            store_cpu_field(tmp, teehbr);
5657            return 0;
5658        }
5659    }
5660    fprintf(stderr, "Unknown cp14 write op1:%d crn:%d crm:%d op2:%d\n",
5661            op1, crn, crm, op2);
5662    return 1;
5663}
5664
5665static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn)
5666{
5667    int cpnum;
5668
5669    cpnum = (insn >> 8) & 0xf;
5670    if (arm_feature(env, ARM_FEATURE_XSCALE)
5671	    && ((env->cp15.c15_cpar ^ 0x3fff) & (1 << cpnum)))
5672	return 1;
5673
5674    switch (cpnum) {
5675      case 0:
5676      case 1:
5677	if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
5678	    return disas_iwmmxt_insn(env, s, insn);
5679	} else if (arm_feature(env, ARM_FEATURE_XSCALE)) {
5680	    return disas_dsp_insn(env, s, insn);
5681	}
5682	return 1;
5683    case 10:
5684    case 11:
5685	return disas_vfp_insn (env, s, insn);
5686    case 14:
5687        /* Coprocessors 7-15 are architecturally reserved by ARM.
5688           Unfortunately Intel decided to ignore this.  */
5689        if (arm_feature(env, ARM_FEATURE_XSCALE))
5690            goto board;
5691        if (insn & (1 << 20))
5692            return disas_cp14_read(env, s, insn);
5693        else
5694            return disas_cp14_write(env, s, insn);
5695    case 15:
5696	return disas_cp15_insn (env, s, insn);
5697    default:
5698    board:
5699	/* Unknown coprocessor.  See if the board has hooked it.  */
5700	return disas_cp_insn (env, s, insn);
5701    }
5702}
5703
5704
5705/* Store a 64-bit value to a register pair.  Clobbers val.  */
5706static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
5707{
5708    TCGv tmp;
5709    tmp = new_tmp();
5710    tcg_gen_trunc_i64_i32(tmp, val);
5711    store_reg(s, rlow, tmp);
5712    tmp = new_tmp();
5713    tcg_gen_shri_i64(val, val, 32);
5714    tcg_gen_trunc_i64_i32(tmp, val);
5715    store_reg(s, rhigh, tmp);
5716}
5717
5718/* load a 32-bit value from a register and perform a 64-bit accumulate.  */
5719static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow)
5720{
5721    TCGv_i64 tmp;
5722    TCGv tmp2;
5723
5724    /* Load value and extend to 64 bits.  */
5725    tmp = tcg_temp_new_i64();
5726    tmp2 = load_reg(s, rlow);
5727    tcg_gen_extu_i32_i64(tmp, tmp2);
5728    dead_tmp(tmp2);
5729    tcg_gen_add_i64(val, val, tmp);
5730}
5731
5732/* load and add a 64-bit value from a register pair.  */
5733static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
5734{
5735    TCGv_i64 tmp;
5736    TCGv tmpl;
5737    TCGv tmph;
5738
5739    /* Load 64-bit value rd:rn.  */
5740    tmpl = load_reg(s, rlow);
5741    tmph = load_reg(s, rhigh);
5742    tmp = tcg_temp_new_i64();
5743    tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
5744    dead_tmp(tmpl);
5745    dead_tmp(tmph);
5746    tcg_gen_add_i64(val, val, tmp);
5747}
5748
5749/* Set N and Z flags from a 64-bit value.  */
5750static void gen_logicq_cc(TCGv_i64 val)
5751{
5752    TCGv tmp = new_tmp();
5753    gen_helper_logicq_cc(tmp, val);
5754    gen_logic_CC(tmp);
5755    dead_tmp(tmp);
5756}
5757
5758
5759#ifdef CONFIG_TRACE
5760
5761#define  gen_traceInsn()   gen_helper_traceInsn()
5762
5763static void
5764gen_traceTicks( int  count )
5765{
5766    TCGv  tmp = tcg_temp_new_i32();
5767    tcg_gen_movi_i32(tmp, count);
5768    gen_helper_traceTicks(tmp);
5769    tcg_temp_free_i32(tmp);
5770}
5771
5772static void
5773gen_traceBB( uint64_t  bbNum, void* tb )
5774{
5775#if HOST_LONG_BITS == 32
5776    TCGv_i64  tmpNum = tcg_temp_new_i64();
5777    TCGv_i32  tmpTb  = tcg_temp_new_i32();
5778
5779    tcg_gen_movi_i64(tmpNum, (int64_t)bbNum);
5780    tcg_gen_movi_i32(tmpTb,  (int32_t)tb);
5781    gen_helper_traceBB32(tmpNum, tmpTb);
5782    tcg_temp_free_i32(tmpTb);
5783    tcg_temp_free_i64(tmpNum);
5784#elif HOST_LONG_BITS == 64
5785    TCGv_i64  tmpNum = tcg_temp_new_i64();
5786    TCGv_i64  tmpTb  = tcg_temp_new_i64();
5787
5788    tcg_gen_movi_i64(tmpNum, (int64_t)bbNum);
5789    tcg_gen_movi_i64(tmpTb,  (int64_t)tb);
5790    gen_helper_traceBB64(tmpNum, tmpTb);
5791    tcg_temp_free_i64(tmpTb);
5792    tcg_temp_free_i64(tmpNum);
5793#endif
5794}
5795#endif /* CONFIG_TRACE */
5796
5797static void disas_arm_insn(CPUState * env, DisasContext *s)
5798{
5799    unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
5800#ifdef CONFIG_TRACE
5801    int  ticks = 0;
5802#endif
5803    TCGv tmp;
5804    TCGv tmp2;
5805    TCGv tmp3;
5806    TCGv addr;
5807    TCGv_i64 tmp64;
5808    insn = ldl_code(s->pc);
5809
5810#ifdef CONFIG_MEMCHECK
5811    if (watch_call_stack(s)) {
5812        if (is_ret_address(env, s->pc)) {
5813            set_on_ret(s->pc);
5814        }
5815        if (is_arm_bl_or_blx(insn)) {
5816            set_on_call(s->pc, s->pc + 4);
5817            if (!s->search_pc) {
5818                register_ret_address(env, s->pc + 4);
5819            }
5820        }
5821    }
5822#endif  // CONFIG_MEMCHECK
5823
5824#ifdef CONFIG_TRACE
5825    if (tracing) {
5826        trace_add_insn(insn, 0);
5827        ticks = get_insn_ticks_arm(insn);
5828        gen_traceInsn();
5829    }
5830#endif
5831
5832    s->pc += 4;
5833
5834    /* M variants do not implement ARM mode.  */
5835    if (IS_M(env))
5836        goto illegal_op;
5837    cond = insn >> 28;
5838    if (cond == 0xf){
5839#ifdef CONFIG_TRACE
5840        if (tracing) {
5841            gen_traceTicks(ticks);
5842        }
5843#endif
5844        /* Unconditional instructions.  */
5845        if (((insn >> 25) & 7) == 1) {
5846            /* NEON Data processing.  */
5847            if (!arm_feature(env, ARM_FEATURE_NEON))
5848                goto illegal_op;
5849
5850            if (disas_neon_data_insn(env, s, insn))
5851                goto illegal_op;
5852            return;
5853        }
5854        if ((insn & 0x0f100000) == 0x04000000) {
5855            /* NEON load/store.  */
5856            if (!arm_feature(env, ARM_FEATURE_NEON))
5857                goto illegal_op;
5858
5859            if (disas_neon_ls_insn(env, s, insn))
5860                goto illegal_op;
5861            return;
5862        }
5863        if ((insn & 0x0d70f000) == 0x0550f000)
5864            return; /* PLD */
5865        else if ((insn & 0x0ffffdff) == 0x01010000) {
5866            ARCH(6);
5867            /* setend */
5868            if (insn & (1 << 9)) {
5869                /* BE8 mode not implemented.  */
5870                goto illegal_op;
5871            }
5872            return;
5873        } else if ((insn & 0x0fffff00) == 0x057ff000) {
5874            switch ((insn >> 4) & 0xf) {
5875            case 1: /* clrex */
5876                ARCH(6K);
5877                gen_helper_clrex(cpu_env);
5878                return;
5879            case 4: /* dsb */
5880            case 5: /* dmb */
5881            case 6: /* isb */
5882                ARCH(7);
5883                /* We don't emulate caches so these are a no-op.  */
5884                return;
5885            default:
5886                goto illegal_op;
5887            }
5888        } else if ((insn & 0x0e5fffe0) == 0x084d0500) {
5889            /* srs */
5890            uint32_t offset;
5891            if (IS_USER(s))
5892                goto illegal_op;
5893            ARCH(6);
5894            op1 = (insn & 0x1f);
5895            if (op1 == (env->uncached_cpsr & CPSR_M)) {
5896                addr = load_reg(s, 13);
5897            } else {
5898                addr = new_tmp();
5899                gen_helper_get_r13_banked(addr, cpu_env, tcg_const_i32(op1));
5900            }
5901            i = (insn >> 23) & 3;
5902            switch (i) {
5903            case 0: offset = -4; break; /* DA */
5904            case 1: offset = -8; break; /* DB */
5905            case 2: offset = 0; break; /* IA */
5906            case 3: offset = 4; break; /* IB */
5907            default: abort();
5908            }
5909            if (offset)
5910                tcg_gen_addi_i32(addr, addr, offset);
5911            tmp = load_reg(s, 14);
5912            gen_st32(tmp, addr, 0);
5913            tmp = new_tmp();
5914            gen_helper_cpsr_read(tmp);
5915            tcg_gen_addi_i32(addr, addr, 4);
5916            gen_st32(tmp, addr, 0);
5917            if (insn & (1 << 21)) {
5918                /* Base writeback.  */
5919                switch (i) {
5920                case 0: offset = -8; break;
5921                case 1: offset = -4; break;
5922                case 2: offset = 4; break;
5923                case 3: offset = 0; break;
5924                default: abort();
5925                }
5926                if (offset)
5927                    tcg_gen_addi_i32(addr, tmp, offset);
5928                if (op1 == (env->uncached_cpsr & CPSR_M)) {
5929                    gen_movl_reg_T1(s, 13);
5930                } else {
5931                    gen_helper_set_r13_banked(cpu_env, tcg_const_i32(op1), cpu_T[1]);
5932                }
5933            } else {
5934                dead_tmp(addr);
5935            }
5936        } else if ((insn & 0x0e5fffe0) == 0x081d0a00) {
5937            /* rfe */
5938            uint32_t offset;
5939            if (IS_USER(s))
5940                goto illegal_op;
5941            ARCH(6);
5942            rn = (insn >> 16) & 0xf;
5943            addr = load_reg(s, rn);
5944            i = (insn >> 23) & 3;
5945            switch (i) {
5946            case 0: offset = -4; break; /* DA */
5947            case 1: offset = -8; break; /* DB */
5948            case 2: offset = 0; break; /* IA */
5949            case 3: offset = 4; break; /* IB */
5950            default: abort();
5951            }
5952            if (offset)
5953                tcg_gen_addi_i32(addr, addr, offset);
5954            /* Load PC into tmp and CPSR into tmp2.  */
5955            tmp = gen_ld32(addr, 0);
5956            tcg_gen_addi_i32(addr, addr, 4);
5957            tmp2 = gen_ld32(addr, 0);
5958            if (insn & (1 << 21)) {
5959                /* Base writeback.  */
5960                switch (i) {
5961                case 0: offset = -8; break;
5962                case 1: offset = -4; break;
5963                case 2: offset = 4; break;
5964                case 3: offset = 0; break;
5965                default: abort();
5966                }
5967                if (offset)
5968                    tcg_gen_addi_i32(addr, addr, offset);
5969                store_reg(s, rn, addr);
5970            } else {
5971                dead_tmp(addr);
5972            }
5973            gen_rfe(s, tmp, tmp2);
5974        } else if ((insn & 0x0e000000) == 0x0a000000) {
5975            /* branch link and change to thumb (blx <offset>) */
5976            int32_t offset;
5977
5978            val = (uint32_t)s->pc;
5979            tmp = new_tmp();
5980            tcg_gen_movi_i32(tmp, val);
5981            store_reg(s, 14, tmp);
5982            /* Sign-extend the 24-bit offset */
5983            offset = (((int32_t)insn) << 8) >> 8;
5984            /* offset * 4 + bit24 * 2 + (thumb bit) */
5985            val += (offset << 2) | ((insn >> 23) & 2) | 1;
5986            /* pipeline offset */
5987            val += 4;
5988            gen_bx_im(s, val);
5989            return;
5990        } else if ((insn & 0x0e000f00) == 0x0c000100) {
5991            if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
5992                /* iWMMXt register transfer.  */
5993                if (env->cp15.c15_cpar & (1 << 1))
5994                    if (!disas_iwmmxt_insn(env, s, insn))
5995                        return;
5996            }
5997        } else if ((insn & 0x0fe00000) == 0x0c400000) {
5998            /* Coprocessor double register transfer.  */
5999        } else if ((insn & 0x0f000010) == 0x0e000010) {
6000            /* Additional coprocessor register transfer.  */
6001        } else if ((insn & 0x0ff10020) == 0x01000000) {
6002            uint32_t mask;
6003            uint32_t val;
6004            /* cps (privileged) */
6005            if (IS_USER(s))
6006                return;
6007            mask = val = 0;
6008            if (insn & (1 << 19)) {
6009                if (insn & (1 << 8))
6010                    mask |= CPSR_A;
6011                if (insn & (1 << 7))
6012                    mask |= CPSR_I;
6013                if (insn & (1 << 6))
6014                    mask |= CPSR_F;
6015                if (insn & (1 << 18))
6016                    val |= mask;
6017            }
6018            if (insn & (1 << 17)) {
6019                mask |= CPSR_M;
6020                val |= (insn & 0x1f);
6021            }
6022            if (mask) {
6023                gen_op_movl_T0_im(val);
6024                gen_set_psr_T0(s, mask, 0);
6025            }
6026            return;
6027        }
6028        goto illegal_op;
6029    }
6030    if (cond != 0xe) {
6031#ifdef CONFIG_TRACE
6032        if (tracing) {
6033            /* a non-executed conditional instruction takes */
6034            /* only 1 cycle */
6035            gen_traceTicks(1);
6036            ticks -= 1;
6037        }
6038#endif
6039        /* if not always execute, we generate a conditional jump to
6040           next instruction */
6041        s->condlabel = gen_new_label();
6042        gen_test_cc(cond ^ 1, s->condlabel);
6043        s->condjmp = 1;
6044    }
6045#ifdef CONFIG_TRACE
6046    if (tracing && ticks > 0) {
6047        gen_traceTicks(ticks);
6048    }
6049#endif
6050    if ((insn & 0x0f900000) == 0x03000000) {
6051        if ((insn & (1 << 21)) == 0) {
6052            ARCH(6T2);
6053            rd = (insn >> 12) & 0xf;
6054            val = ((insn >> 4) & 0xf000) | (insn & 0xfff);
6055            if ((insn & (1 << 22)) == 0) {
6056                /* MOVW */
6057                tmp = new_tmp();
6058                tcg_gen_movi_i32(tmp, val);
6059            } else {
6060                /* MOVT */
6061                tmp = load_reg(s, rd);
6062                tcg_gen_ext16u_i32(tmp, tmp);
6063                tcg_gen_ori_i32(tmp, tmp, val << 16);
6064            }
6065            store_reg(s, rd, tmp);
6066        } else {
6067            if (((insn >> 12) & 0xf) != 0xf)
6068                goto illegal_op;
6069            if (((insn >> 16) & 0xf) == 0) {
6070                gen_nop_hint(s, insn & 0xff);
6071            } else {
6072                /* CPSR = immediate */
6073                val = insn & 0xff;
6074                shift = ((insn >> 8) & 0xf) * 2;
6075                if (shift)
6076                    val = (val >> shift) | (val << (32 - shift));
6077                gen_op_movl_T0_im(val);
6078                i = ((insn & (1 << 22)) != 0);
6079                if (gen_set_psr_T0(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i))
6080                    goto illegal_op;
6081            }
6082        }
6083    } else if ((insn & 0x0f900000) == 0x01000000
6084               && (insn & 0x00000090) != 0x00000090) {
6085        /* miscellaneous instructions */
6086        op1 = (insn >> 21) & 3;
6087        sh = (insn >> 4) & 0xf;
6088        rm = insn & 0xf;
6089        switch (sh) {
6090        case 0x0: /* move program status register */
6091            if (op1 & 1) {
6092                /* PSR = reg */
6093                gen_movl_T0_reg(s, rm);
6094                i = ((op1 & 2) != 0);
6095                if (gen_set_psr_T0(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i))
6096                    goto illegal_op;
6097            } else {
6098                /* reg = PSR */
6099                rd = (insn >> 12) & 0xf;
6100                if (op1 & 2) {
6101                    if (IS_USER(s))
6102                        goto illegal_op;
6103                    tmp = load_cpu_field(spsr);
6104                } else {
6105                    tmp = new_tmp();
6106                    gen_helper_cpsr_read(tmp);
6107                }
6108                store_reg(s, rd, tmp);
6109            }
6110            break;
6111        case 0x1:
6112            if (op1 == 1) {
6113                /* branch/exchange thumb (bx).  */
6114                tmp = load_reg(s, rm);
6115                gen_bx(s, tmp);
6116            } else if (op1 == 3) {
6117                /* clz */
6118                rd = (insn >> 12) & 0xf;
6119                tmp = load_reg(s, rm);
6120                gen_helper_clz(tmp, tmp);
6121                store_reg(s, rd, tmp);
6122            } else {
6123                goto illegal_op;
6124            }
6125            break;
6126        case 0x2:
6127            if (op1 == 1) {
6128                ARCH(5J); /* bxj */
6129                /* Trivial implementation equivalent to bx.  */
6130                tmp = load_reg(s, rm);
6131                gen_bx(s, tmp);
6132            } else {
6133                goto illegal_op;
6134            }
6135            break;
6136        case 0x3:
6137            if (op1 != 1)
6138              goto illegal_op;
6139
6140            /* branch link/exchange thumb (blx) */
6141            tmp = load_reg(s, rm);
6142            tmp2 = new_tmp();
6143            tcg_gen_movi_i32(tmp2, s->pc);
6144            store_reg(s, 14, tmp2);
6145            gen_bx(s, tmp);
6146            break;
6147        case 0x5: /* saturating add/subtract */
6148            rd = (insn >> 12) & 0xf;
6149            rn = (insn >> 16) & 0xf;
6150            tmp = load_reg(s, rm);
6151            tmp2 = load_reg(s, rn);
6152            if (op1 & 2)
6153                gen_helper_double_saturate(tmp2, tmp2);
6154            if (op1 & 1)
6155                gen_helper_sub_saturate(tmp, tmp, tmp2);
6156            else
6157                gen_helper_add_saturate(tmp, tmp, tmp2);
6158            dead_tmp(tmp2);
6159            store_reg(s, rd, tmp);
6160            break;
6161        case 7: /* bkpt */
6162            gen_set_condexec(s);
6163            gen_set_pc_im(s->pc - 4);
6164            gen_exception(EXCP_BKPT);
6165            s->is_jmp = DISAS_JUMP;
6166            break;
6167        case 0x8: /* signed multiply */
6168        case 0xa:
6169        case 0xc:
6170        case 0xe:
6171            rs = (insn >> 8) & 0xf;
6172            rn = (insn >> 12) & 0xf;
6173            rd = (insn >> 16) & 0xf;
6174            if (op1 == 1) {
6175                /* (32 * 16) >> 16 */
6176                tmp = load_reg(s, rm);
6177                tmp2 = load_reg(s, rs);
6178                if (sh & 4)
6179                    tcg_gen_sari_i32(tmp2, tmp2, 16);
6180                else
6181                    gen_sxth(tmp2);
6182                tmp64 = gen_muls_i64_i32(tmp, tmp2);
6183                tcg_gen_shri_i64(tmp64, tmp64, 16);
6184                tmp = new_tmp();
6185                tcg_gen_trunc_i64_i32(tmp, tmp64);
6186                if ((sh & 2) == 0) {
6187                    tmp2 = load_reg(s, rn);
6188                    gen_helper_add_setq(tmp, tmp, tmp2);
6189                    dead_tmp(tmp2);
6190                }
6191                store_reg(s, rd, tmp);
6192            } else {
6193                /* 16 * 16 */
6194                tmp = load_reg(s, rm);
6195                tmp2 = load_reg(s, rs);
6196                gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
6197                dead_tmp(tmp2);
6198                if (op1 == 2) {
6199                    tmp64 = tcg_temp_new_i64();
6200                    tcg_gen_ext_i32_i64(tmp64, tmp);
6201                    dead_tmp(tmp);
6202                    gen_addq(s, tmp64, rn, rd);
6203                    gen_storeq_reg(s, rn, rd, tmp64);
6204                } else {
6205                    if (op1 == 0) {
6206                        tmp2 = load_reg(s, rn);
6207                        gen_helper_add_setq(tmp, tmp, tmp2);
6208                        dead_tmp(tmp2);
6209                    }
6210                    store_reg(s, rd, tmp);
6211                }
6212            }
6213            break;
6214        default:
6215            goto illegal_op;
6216        }
6217    } else if (((insn & 0x0e000000) == 0 &&
6218                (insn & 0x00000090) != 0x90) ||
6219               ((insn & 0x0e000000) == (1 << 25))) {
6220        int set_cc, logic_cc, shiftop;
6221
6222        op1 = (insn >> 21) & 0xf;
6223        set_cc = (insn >> 20) & 1;
6224        logic_cc = table_logic_cc[op1] & set_cc;
6225
6226        /* data processing instruction */
6227        if (insn & (1 << 25)) {
6228            /* immediate operand */
6229            val = insn & 0xff;
6230            shift = ((insn >> 8) & 0xf) * 2;
6231            if (shift) {
6232                val = (val >> shift) | (val << (32 - shift));
6233            }
6234            tmp2 = new_tmp();
6235            tcg_gen_movi_i32(tmp2, val);
6236            if (logic_cc && shift) {
6237                gen_set_CF_bit31(tmp2);
6238            }
6239        } else {
6240            /* register */
6241            rm = (insn) & 0xf;
6242            tmp2 = load_reg(s, rm);
6243            shiftop = (insn >> 5) & 3;
6244            if (!(insn & (1 << 4))) {
6245                shift = (insn >> 7) & 0x1f;
6246                gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
6247            } else {
6248                rs = (insn >> 8) & 0xf;
6249                tmp = load_reg(s, rs);
6250                gen_arm_shift_reg(tmp2, shiftop, tmp, logic_cc);
6251            }
6252        }
6253        if (op1 != 0x0f && op1 != 0x0d) {
6254            rn = (insn >> 16) & 0xf;
6255            tmp = load_reg(s, rn);
6256        } else {
6257            TCGV_UNUSED(tmp);
6258        }
6259        rd = (insn >> 12) & 0xf;
6260        switch(op1) {
6261        case 0x00:
6262            tcg_gen_and_i32(tmp, tmp, tmp2);
6263            if (logic_cc) {
6264                gen_logic_CC(tmp);
6265            }
6266            store_reg_bx(env, s, rd, tmp);
6267            break;
6268        case 0x01:
6269            tcg_gen_xor_i32(tmp, tmp, tmp2);
6270            if (logic_cc) {
6271                gen_logic_CC(tmp);
6272            }
6273            store_reg_bx(env, s, rd, tmp);
6274            break;
6275        case 0x02:
6276            if (set_cc && rd == 15) {
6277                /* SUBS r15, ... is used for exception return.  */
6278                if (IS_USER(s)) {
6279                    goto illegal_op;
6280                }
6281                gen_helper_sub_cc(tmp, tmp, tmp2);
6282                gen_exception_return(s, tmp);
6283            } else {
6284                if (set_cc) {
6285                    gen_helper_sub_cc(tmp, tmp, tmp2);
6286                } else {
6287                    tcg_gen_sub_i32(tmp, tmp, tmp2);
6288                }
6289                store_reg_bx(env, s, rd, tmp);
6290            }
6291            break;
6292        case 0x03:
6293            if (set_cc) {
6294                gen_helper_sub_cc(tmp, tmp2, tmp);
6295            } else {
6296                tcg_gen_sub_i32(tmp, tmp2, tmp);
6297            }
6298            store_reg_bx(env, s, rd, tmp);
6299            break;
6300        case 0x04:
6301            if (set_cc) {
6302                gen_helper_add_cc(tmp, tmp, tmp2);
6303            } else {
6304                tcg_gen_add_i32(tmp, tmp, tmp2);
6305            }
6306            store_reg_bx(env, s, rd, tmp);
6307            break;
6308        case 0x05:
6309            if (set_cc) {
6310                gen_helper_adc_cc(tmp, tmp, tmp2);
6311            } else {
6312                gen_add_carry(tmp, tmp, tmp2);
6313            }
6314            store_reg_bx(env, s, rd, tmp);
6315            break;
6316        case 0x06:
6317            if (set_cc) {
6318                gen_helper_sbc_cc(tmp, tmp, tmp2);
6319            } else {
6320                gen_sub_carry(tmp, tmp, tmp2);
6321            }
6322            store_reg_bx(env, s, rd, tmp);
6323            break;
6324        case 0x07:
6325            if (set_cc) {
6326                gen_helper_sbc_cc(tmp, tmp2, tmp);
6327            } else {
6328                gen_sub_carry(tmp, tmp2, tmp);
6329            }
6330            store_reg_bx(env, s, rd, tmp);
6331            break;
6332        case 0x08:
6333            if (set_cc) {
6334                tcg_gen_and_i32(tmp, tmp, tmp2);
6335                gen_logic_CC(tmp);
6336            }
6337            dead_tmp(tmp);
6338            break;
6339        case 0x09:
6340            if (set_cc) {
6341                tcg_gen_xor_i32(tmp, tmp, tmp2);
6342                gen_logic_CC(tmp);
6343            }
6344            dead_tmp(tmp);
6345            break;
6346        case 0x0a:
6347            if (set_cc) {
6348                gen_helper_sub_cc(tmp, tmp, tmp2);
6349            }
6350            dead_tmp(tmp);
6351            break;
6352        case 0x0b:
6353            if (set_cc) {
6354                gen_helper_add_cc(tmp, tmp, tmp2);
6355            }
6356            dead_tmp(tmp);
6357            break;
6358        case 0x0c:
6359            tcg_gen_or_i32(tmp, tmp, tmp2);
6360            if (logic_cc) {
6361                gen_logic_CC(tmp);
6362            }
6363            store_reg_bx(env, s, rd, tmp);
6364            break;
6365        case 0x0d:
6366            if (logic_cc && rd == 15) {
6367                /* MOVS r15, ... is used for exception return.  */
6368                if (IS_USER(s)) {
6369                    goto illegal_op;
6370                }
6371                gen_exception_return(s, tmp2);
6372            } else {
6373                if (logic_cc) {
6374                    gen_logic_CC(tmp2);
6375                }
6376                store_reg_bx(env, s, rd, tmp2);
6377            }
6378            break;
6379        case 0x0e:
6380            tcg_gen_bic_i32(tmp, tmp, tmp2);
6381            if (logic_cc) {
6382                gen_logic_CC(tmp);
6383            }
6384            store_reg_bx(env, s, rd, tmp);
6385            break;
6386        default:
6387        case 0x0f:
6388            tcg_gen_not_i32(tmp2, tmp2);
6389            if (logic_cc) {
6390                gen_logic_CC(tmp2);
6391            }
6392            store_reg_bx(env, s, rd, tmp2);
6393            break;
6394        }
6395        if (op1 != 0x0f && op1 != 0x0d) {
6396            dead_tmp(tmp2);
6397        }
6398    } else {
6399        /* other instructions */
6400        op1 = (insn >> 24) & 0xf;
6401        switch(op1) {
6402        case 0x0:
6403        case 0x1:
6404            /* multiplies, extra load/stores */
6405            sh = (insn >> 5) & 3;
6406            if (sh == 0) {
6407                if (op1 == 0x0) {
6408                    rd = (insn >> 16) & 0xf;
6409                    rn = (insn >> 12) & 0xf;
6410                    rs = (insn >> 8) & 0xf;
6411                    rm = (insn) & 0xf;
6412                    op1 = (insn >> 20) & 0xf;
6413                    switch (op1) {
6414                    case 0: case 1: case 2: case 3: case 6:
6415                        /* 32 bit mul */
6416                        tmp = load_reg(s, rs);
6417                        tmp2 = load_reg(s, rm);
6418                        tcg_gen_mul_i32(tmp, tmp, tmp2);
6419                        dead_tmp(tmp2);
6420                        if (insn & (1 << 22)) {
6421                            /* Subtract (mls) */
6422                            ARCH(6T2);
6423                            tmp2 = load_reg(s, rn);
6424                            tcg_gen_sub_i32(tmp, tmp2, tmp);
6425                            dead_tmp(tmp2);
6426                        } else if (insn & (1 << 21)) {
6427                            /* Add */
6428                            tmp2 = load_reg(s, rn);
6429                            tcg_gen_add_i32(tmp, tmp, tmp2);
6430                            dead_tmp(tmp2);
6431                        }
6432                        if (insn & (1 << 20))
6433                            gen_logic_CC(tmp);
6434                        store_reg(s, rd, tmp);
6435                        break;
6436                    default:
6437                        /* 64 bit mul */
6438                        tmp = load_reg(s, rs);
6439                        tmp2 = load_reg(s, rm);
6440                        if (insn & (1 << 22))
6441                            tmp64 = gen_muls_i64_i32(tmp, tmp2);
6442                        else
6443                            tmp64 = gen_mulu_i64_i32(tmp, tmp2);
6444                        if (insn & (1 << 21)) /* mult accumulate */
6445                            gen_addq(s, tmp64, rn, rd);
6446                        if (!(insn & (1 << 23))) { /* double accumulate */
6447                            ARCH(6);
6448                            gen_addq_lo(s, tmp64, rn);
6449                            gen_addq_lo(s, tmp64, rd);
6450                        }
6451                        if (insn & (1 << 20))
6452                            gen_logicq_cc(tmp64);
6453                        gen_storeq_reg(s, rn, rd, tmp64);
6454                        break;
6455                    }
6456                } else {
6457                    rn = (insn >> 16) & 0xf;
6458                    rd = (insn >> 12) & 0xf;
6459                    if (insn & (1 << 23)) {
6460                        /* load/store exclusive */
6461                        op1 = (insn >> 21) & 0x3;
6462                        if (op1)
6463                            ARCH(6K);
6464                        else
6465                            ARCH(6);
6466                        gen_movl_T1_reg(s, rn);
6467                        addr = cpu_T[1];
6468                        if (insn & (1 << 20)) {
6469                            gen_helper_mark_exclusive(cpu_env, cpu_T[1]);
6470                            switch (op1) {
6471                            case 0: /* ldrex */
6472                                tmp = gen_ld32(addr, IS_USER(s));
6473                                break;
6474                            case 1: /* ldrexd */
6475                                tmp = gen_ld32(addr, IS_USER(s));
6476                                store_reg(s, rd, tmp);
6477                                tcg_gen_addi_i32(addr, addr, 4);
6478                                tmp = gen_ld32(addr, IS_USER(s));
6479                                rd++;
6480                                break;
6481                            case 2: /* ldrexb */
6482                                tmp = gen_ld8u(addr, IS_USER(s));
6483                                break;
6484                            case 3: /* ldrexh */
6485                                tmp = gen_ld16u(addr, IS_USER(s));
6486                                break;
6487                            default:
6488                                abort();
6489                            }
6490                            store_reg(s, rd, tmp);
6491                        } else {
6492                            int label = gen_new_label();
6493                            rm = insn & 0xf;
6494                            gen_helper_test_exclusive(cpu_T[0], cpu_env, addr);
6495                            tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0],
6496                                                0, label);
6497                            tmp = load_reg(s,rm);
6498                            switch (op1) {
6499                            case 0:  /*  strex */
6500                                gen_st32(tmp, addr, IS_USER(s));
6501                                break;
6502                            case 1: /*  strexd */
6503                                gen_st32(tmp, addr, IS_USER(s));
6504                                tcg_gen_addi_i32(addr, addr, 4);
6505                                tmp = load_reg(s, rm + 1);
6506                                gen_st32(tmp, addr, IS_USER(s));
6507                                break;
6508                            case 2: /*  strexb */
6509                                gen_st8(tmp, addr, IS_USER(s));
6510                                break;
6511                            case 3: /* strexh */
6512                                gen_st16(tmp, addr, IS_USER(s));
6513                                break;
6514                            default:
6515                                abort();
6516                            }
6517                            gen_set_label(label);
6518                            gen_movl_reg_T0(s, rd);
6519                        }
6520                    } else {
6521                        /* SWP instruction */
6522                        rm = (insn) & 0xf;
6523
6524                        /* ??? This is not really atomic.  However we know
6525                           we never have multiple CPUs running in parallel,
6526                           so it is good enough.  */
6527                        addr = load_reg(s, rn);
6528                        tmp = load_reg(s, rm);
6529                        if (insn & (1 << 22)) {
6530                            tmp2 = gen_ld8u(addr, IS_USER(s));
6531                            gen_st8(tmp, addr, IS_USER(s));
6532                        } else {
6533                            tmp2 = gen_ld32(addr, IS_USER(s));
6534                            gen_st32(tmp, addr, IS_USER(s));
6535                        }
6536                        dead_tmp(addr);
6537                        store_reg(s, rd, tmp2);
6538                    }
6539                }
6540            } else {
6541                int address_offset;
6542                int load;
6543                /* Misc load/store */
6544                rn = (insn >> 16) & 0xf;
6545                rd = (insn >> 12) & 0xf;
6546                addr = load_reg(s, rn);
6547                if (insn & (1 << 24))
6548                    gen_add_datah_offset(s, insn, 0, addr);
6549                address_offset = 0;
6550                if (insn & (1 << 20)) {
6551                    /* load */
6552                    switch(sh) {
6553                    case 1:
6554                        tmp = gen_ld16u(addr, IS_USER(s));
6555                        break;
6556                    case 2:
6557                        tmp = gen_ld8s(addr, IS_USER(s));
6558                        break;
6559                    default:
6560                    case 3:
6561                        tmp = gen_ld16s(addr, IS_USER(s));
6562                        break;
6563                    }
6564                    load = 1;
6565                } else if (sh & 2) {
6566                    /* doubleword */
6567                    if (sh & 1) {
6568                        /* store */
6569                        tmp = load_reg(s, rd);
6570                        gen_st32(tmp, addr, IS_USER(s));
6571                        tcg_gen_addi_i32(addr, addr, 4);
6572                        tmp = load_reg(s, rd + 1);
6573                        gen_st32(tmp, addr, IS_USER(s));
6574                        load = 0;
6575                    } else {
6576                        /* load */
6577                        tmp = gen_ld32(addr, IS_USER(s));
6578                        store_reg(s, rd, tmp);
6579                        tcg_gen_addi_i32(addr, addr, 4);
6580                        tmp = gen_ld32(addr, IS_USER(s));
6581                        rd++;
6582                        load = 1;
6583                    }
6584                    address_offset = -4;
6585                } else {
6586                    /* store */
6587                    tmp = load_reg(s, rd);
6588                    gen_st16(tmp, addr, IS_USER(s));
6589                    load = 0;
6590                }
6591                /* Perform base writeback before the loaded value to
6592                   ensure correct behavior with overlapping index registers.
6593                   ldrd with base writeback is is undefined if the
6594                   destination and index registers overlap.  */
6595                if (!(insn & (1 << 24))) {
6596                    gen_add_datah_offset(s, insn, address_offset, addr);
6597                    store_reg(s, rn, addr);
6598                } else if (insn & (1 << 21)) {
6599                    if (address_offset)
6600                        tcg_gen_addi_i32(addr, addr, address_offset);
6601                    store_reg(s, rn, addr);
6602                } else {
6603                    dead_tmp(addr);
6604                }
6605                if (load) {
6606                    /* Complete the load.  */
6607                    store_reg(s, rd, tmp);
6608                }
6609            }
6610            break;
6611        case 0x4:
6612        case 0x5:
6613            goto do_ldst;
6614        case 0x6:
6615        case 0x7:
6616            if (insn & (1 << 4)) {
6617                ARCH(6);
6618                /* Armv6 Media instructions.  */
6619                rm = insn & 0xf;
6620                rn = (insn >> 16) & 0xf;
6621                rd = (insn >> 12) & 0xf;
6622                rs = (insn >> 8) & 0xf;
6623                switch ((insn >> 23) & 3) {
6624                case 0: /* Parallel add/subtract.  */
6625                    op1 = (insn >> 20) & 7;
6626                    tmp = load_reg(s, rn);
6627                    tmp2 = load_reg(s, rm);
6628                    sh = (insn >> 5) & 7;
6629                    if ((op1 & 3) == 0 || sh == 5 || sh == 6)
6630                        goto illegal_op;
6631                    gen_arm_parallel_addsub(op1, sh, tmp, tmp2);
6632                    dead_tmp(tmp2);
6633                    store_reg(s, rd, tmp);
6634                    break;
6635                case 1:
6636                    if ((insn & 0x00700020) == 0) {
6637                        /* Halfword pack.  */
6638                        tmp = load_reg(s, rn);
6639                        tmp2 = load_reg(s, rm);
6640                        shift = (insn >> 7) & 0x1f;
6641                        if (insn & (1 << 6)) {
6642                            /* pkhtb */
6643                            if (shift == 0)
6644                                shift = 31;
6645                            tcg_gen_sari_i32(tmp2, tmp2, shift);
6646                            tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
6647                            tcg_gen_ext16u_i32(tmp2, tmp2);
6648                        } else {
6649                            /* pkhbt */
6650                            if (shift)
6651                                tcg_gen_shli_i32(tmp2, tmp2, shift);
6652                            tcg_gen_ext16u_i32(tmp, tmp);
6653                            tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
6654                        }
6655                        tcg_gen_or_i32(tmp, tmp, tmp2);
6656                        dead_tmp(tmp2);
6657                        store_reg(s, rd, tmp);
6658                    } else if ((insn & 0x00200020) == 0x00200000) {
6659                        /* [us]sat */
6660                        tmp = load_reg(s, rm);
6661                        shift = (insn >> 7) & 0x1f;
6662                        if (insn & (1 << 6)) {
6663                            if (shift == 0)
6664                                shift = 31;
6665                            tcg_gen_sari_i32(tmp, tmp, shift);
6666                        } else {
6667                            tcg_gen_shli_i32(tmp, tmp, shift);
6668                        }
6669                        sh = (insn >> 16) & 0x1f;
6670                        if (sh != 0) {
6671                            if (insn & (1 << 22))
6672                                gen_helper_usat(tmp, tmp, tcg_const_i32(sh));
6673                            else
6674                                gen_helper_ssat(tmp, tmp, tcg_const_i32(sh));
6675                        }
6676                        store_reg(s, rd, tmp);
6677                    } else if ((insn & 0x00300fe0) == 0x00200f20) {
6678                        /* [us]sat16 */
6679                        tmp = load_reg(s, rm);
6680                        sh = (insn >> 16) & 0x1f;
6681                        if (sh != 0) {
6682                            if (insn & (1 << 22))
6683                                gen_helper_usat16(tmp, tmp, tcg_const_i32(sh));
6684                            else
6685                                gen_helper_ssat16(tmp, tmp, tcg_const_i32(sh));
6686                        }
6687                        store_reg(s, rd, tmp);
6688                    } else if ((insn & 0x00700fe0) == 0x00000fa0) {
6689                        /* Select bytes.  */
6690                        tmp = load_reg(s, rn);
6691                        tmp2 = load_reg(s, rm);
6692                        tmp3 = new_tmp();
6693                        tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE));
6694                        gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
6695                        dead_tmp(tmp3);
6696                        dead_tmp(tmp2);
6697                        store_reg(s, rd, tmp);
6698                    } else if ((insn & 0x000003e0) == 0x00000060) {
6699                        tmp = load_reg(s, rm);
6700                        shift = (insn >> 10) & 3;
6701                        /* ??? In many cases it's not neccessary to do a
6702                           rotate, a shift is sufficient.  */
6703                        if (shift != 0)
6704                            tcg_gen_rori_i32(tmp, tmp, shift * 8);
6705                        op1 = (insn >> 20) & 7;
6706                        switch (op1) {
6707                        case 0: gen_sxtb16(tmp);  break;
6708                        case 2: gen_sxtb(tmp);    break;
6709                        case 3: gen_sxth(tmp);    break;
6710                        case 4: gen_uxtb16(tmp);  break;
6711                        case 6: gen_uxtb(tmp);    break;
6712                        case 7: gen_uxth(tmp);    break;
6713                        default: goto illegal_op;
6714                        }
6715                        if (rn != 15) {
6716                            tmp2 = load_reg(s, rn);
6717                            if ((op1 & 3) == 0) {
6718                                gen_add16(tmp, tmp2);
6719                            } else {
6720                                tcg_gen_add_i32(tmp, tmp, tmp2);
6721                                dead_tmp(tmp2);
6722                            }
6723                        }
6724                        store_reg(s, rd, tmp);
6725                    } else if ((insn & 0x003f0f60) == 0x003f0f20) {
6726                        /* rev */
6727                        tmp = load_reg(s, rm);
6728                        if (insn & (1 << 22)) {
6729                            if (insn & (1 << 7)) {
6730                                gen_revsh(tmp);
6731                            } else {
6732                                ARCH(6T2);
6733                                gen_helper_rbit(tmp, tmp);
6734                            }
6735                        } else {
6736                            if (insn & (1 << 7))
6737                                gen_rev16(tmp);
6738                            else
6739                                tcg_gen_bswap32_i32(tmp, tmp);
6740                        }
6741                        store_reg(s, rd, tmp);
6742                    } else {
6743                        goto illegal_op;
6744                    }
6745                    break;
6746                case 2: /* Multiplies (Type 3).  */
6747                    tmp = load_reg(s, rm);
6748                    tmp2 = load_reg(s, rs);
6749                    if (insn & (1 << 20)) {
6750                        /* Signed multiply most significant [accumulate].  */
6751                        tmp64 = gen_muls_i64_i32(tmp, tmp2);
6752                        if (insn & (1 << 5))
6753                            tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
6754                        tcg_gen_shri_i64(tmp64, tmp64, 32);
6755                        tmp = new_tmp();
6756                        tcg_gen_trunc_i64_i32(tmp, tmp64);
6757                        if (rd != 15) {
6758                            tmp2 = load_reg(s, rd);
6759                            if (insn & (1 << 6)) {
6760                                tcg_gen_sub_i32(tmp, tmp, tmp2);
6761                            } else {
6762                                tcg_gen_add_i32(tmp, tmp, tmp2);
6763                            }
6764                            dead_tmp(tmp2);
6765                        }
6766                        store_reg(s, rn, tmp);
6767                    } else {
6768                        if (insn & (1 << 5))
6769                            gen_swap_half(tmp2);
6770                        gen_smul_dual(tmp, tmp2);
6771                        /* This addition cannot overflow.  */
6772                        if (insn & (1 << 6)) {
6773                            tcg_gen_sub_i32(tmp, tmp, tmp2);
6774                        } else {
6775                            tcg_gen_add_i32(tmp, tmp, tmp2);
6776                        }
6777                        dead_tmp(tmp2);
6778                        if (insn & (1 << 22)) {
6779                            /* smlald, smlsld */
6780                            tmp64 = tcg_temp_new_i64();
6781                            tcg_gen_ext_i32_i64(tmp64, tmp);
6782                            dead_tmp(tmp);
6783                            gen_addq(s, tmp64, rd, rn);
6784                            gen_storeq_reg(s, rd, rn, tmp64);
6785                        } else {
6786                            /* smuad, smusd, smlad, smlsd */
6787                            if (rd != 15)
6788                              {
6789                                tmp2 = load_reg(s, rd);
6790                                gen_helper_add_setq(tmp, tmp, tmp2);
6791                                dead_tmp(tmp2);
6792                              }
6793                            store_reg(s, rn, tmp);
6794                        }
6795                    }
6796                    break;
6797                case 3:
6798                    op1 = ((insn >> 17) & 0x38) | ((insn >> 5) & 7);
6799                    switch (op1) {
6800                    case 0: /* Unsigned sum of absolute differences.  */
6801                        ARCH(6);
6802                        tmp = load_reg(s, rm);
6803                        tmp2 = load_reg(s, rs);
6804                        gen_helper_usad8(tmp, tmp, tmp2);
6805                        dead_tmp(tmp2);
6806                        if (rd != 15) {
6807                            tmp2 = load_reg(s, rd);
6808                            tcg_gen_add_i32(tmp, tmp, tmp2);
6809                            dead_tmp(tmp2);
6810                        }
6811                        store_reg(s, rn, tmp);
6812                        break;
6813                    case 0x20: case 0x24: case 0x28: case 0x2c:
6814                        /* Bitfield insert/clear.  */
6815                        ARCH(6T2);
6816                        shift = (insn >> 7) & 0x1f;
6817                        i = (insn >> 16) & 0x1f;
6818                        i = i + 1 - shift;
6819                        if (rm == 15) {
6820                            tmp = new_tmp();
6821                            tcg_gen_movi_i32(tmp, 0);
6822                        } else {
6823                            tmp = load_reg(s, rm);
6824                        }
6825                        if (i != 32) {
6826                            tmp2 = load_reg(s, rd);
6827                            gen_bfi(tmp, tmp2, tmp, shift, (1u << i) - 1);
6828                            dead_tmp(tmp2);
6829                        }
6830                        store_reg(s, rd, tmp);
6831                        break;
6832                    case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */
6833                    case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */
6834                        ARCH(6T2);
6835                        tmp = load_reg(s, rm);
6836                        shift = (insn >> 7) & 0x1f;
6837                        i = ((insn >> 16) & 0x1f) + 1;
6838                        if (shift + i > 32)
6839                            goto illegal_op;
6840                        if (i < 32) {
6841                            if (op1 & 0x20) {
6842                                gen_ubfx(tmp, shift, (1u << i) - 1);
6843                            } else {
6844                                gen_sbfx(tmp, shift, i);
6845                            }
6846                        }
6847                        store_reg(s, rd, tmp);
6848                        break;
6849                    default:
6850                        goto illegal_op;
6851                    }
6852                    break;
6853                }
6854                break;
6855            }
6856        do_ldst:
6857            /* Check for undefined extension instructions
6858             * per the ARM Bible IE:
6859             * xxxx 0111 1111 xxxx  xxxx xxxx 1111 xxxx
6860             */
6861            sh = (0xf << 20) | (0xf << 4);
6862            if (op1 == 0x7 && ((insn & sh) == sh))
6863            {
6864                goto illegal_op;
6865            }
6866            /* load/store byte/word */
6867            rn = (insn >> 16) & 0xf;
6868            rd = (insn >> 12) & 0xf;
6869            tmp2 = load_reg(s, rn);
6870            i = (IS_USER(s) || (insn & 0x01200000) == 0x00200000);
6871            if (insn & (1 << 24))
6872                gen_add_data_offset(s, insn, tmp2);
6873            if (insn & (1 << 20)) {
6874                /* load */
6875                if (insn & (1 << 22)) {
6876                    tmp = gen_ld8u(tmp2, i);
6877                } else {
6878                    tmp = gen_ld32(tmp2, i);
6879                }
6880            } else {
6881                /* store */
6882                tmp = load_reg(s, rd);
6883                if (insn & (1 << 22))
6884                    gen_st8(tmp, tmp2, i);
6885                else
6886                    gen_st32(tmp, tmp2, i);
6887            }
6888            if (!(insn & (1 << 24))) {
6889                gen_add_data_offset(s, insn, tmp2);
6890                store_reg(s, rn, tmp2);
6891            } else if (insn & (1 << 21)) {
6892                store_reg(s, rn, tmp2);
6893            } else {
6894                dead_tmp(tmp2);
6895            }
6896            if (insn & (1 << 20)) {
6897                /* Complete the load.  */
6898                if (rd == 15)
6899                    gen_bx(s, tmp);
6900                else
6901                    store_reg(s, rd, tmp);
6902            }
6903            break;
6904        case 0x08:
6905        case 0x09:
6906            {
6907                int j, n, user, loaded_base;
6908                TCGv loaded_var;
6909                /* load/store multiple words */
6910                /* XXX: store correct base if write back */
6911                user = 0;
6912                if (insn & (1 << 22)) {
6913                    if (IS_USER(s))
6914                        goto illegal_op; /* only usable in supervisor mode */
6915
6916                    if ((insn & (1 << 15)) == 0)
6917                        user = 1;
6918                }
6919                rn = (insn >> 16) & 0xf;
6920                addr = load_reg(s, rn);
6921
6922                /* compute total size */
6923                loaded_base = 0;
6924                TCGV_UNUSED(loaded_var);
6925                n = 0;
6926                for(i=0;i<16;i++) {
6927                    if (insn & (1 << i))
6928                        n++;
6929                }
6930                /* XXX: test invalid n == 0 case ? */
6931                if (insn & (1 << 23)) {
6932                    if (insn & (1 << 24)) {
6933                        /* pre increment */
6934                        tcg_gen_addi_i32(addr, addr, 4);
6935                    } else {
6936                        /* post increment */
6937                    }
6938                } else {
6939                    if (insn & (1 << 24)) {
6940                        /* pre decrement */
6941                        tcg_gen_addi_i32(addr, addr, -(n * 4));
6942                    } else {
6943                        /* post decrement */
6944                        if (n != 1)
6945                        tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
6946                    }
6947                }
6948                j = 0;
6949                for(i=0;i<16;i++) {
6950                    if (insn & (1 << i)) {
6951                        if (insn & (1 << 20)) {
6952                            /* load */
6953                            tmp = gen_ld32(addr, IS_USER(s));
6954                            if (i == 15) {
6955                                gen_bx(s, tmp);
6956                            } else if (user) {
6957                                gen_helper_set_user_reg(tcg_const_i32(i), tmp);
6958                                dead_tmp(tmp);
6959                            } else if (i == rn) {
6960                                loaded_var = tmp;
6961                                loaded_base = 1;
6962                            } else {
6963                                store_reg(s, i, tmp);
6964                            }
6965                        } else {
6966                            /* store */
6967                            if (i == 15) {
6968                                /* special case: r15 = PC + 8 */
6969                                val = (long)s->pc + 4;
6970                                tmp = new_tmp();
6971                                tcg_gen_movi_i32(tmp, val);
6972                            } else if (user) {
6973                                tmp = new_tmp();
6974                                gen_helper_get_user_reg(tmp, tcg_const_i32(i));
6975                            } else {
6976                                tmp = load_reg(s, i);
6977                            }
6978                            gen_st32(tmp, addr, IS_USER(s));
6979                        }
6980                        j++;
6981                        /* no need to add after the last transfer */
6982                        if (j != n)
6983                            tcg_gen_addi_i32(addr, addr, 4);
6984                    }
6985                }
6986                if (insn & (1 << 21)) {
6987                    /* write back */
6988                    if (insn & (1 << 23)) {
6989                        if (insn & (1 << 24)) {
6990                            /* pre increment */
6991                        } else {
6992                            /* post increment */
6993                            tcg_gen_addi_i32(addr, addr, 4);
6994                        }
6995                    } else {
6996                        if (insn & (1 << 24)) {
6997                            /* pre decrement */
6998                            if (n != 1)
6999                                tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7000                        } else {
7001                            /* post decrement */
7002                            tcg_gen_addi_i32(addr, addr, -(n * 4));
7003                        }
7004                    }
7005                    store_reg(s, rn, addr);
7006                } else {
7007                    dead_tmp(addr);
7008                }
7009                if (loaded_base) {
7010                    store_reg(s, rn, loaded_var);
7011                }
7012                if ((insn & (1 << 22)) && !user) {
7013                    /* Restore CPSR from SPSR.  */
7014                    tmp = load_cpu_field(spsr);
7015                    gen_set_cpsr(tmp, 0xffffffff);
7016                    dead_tmp(tmp);
7017                    s->is_jmp = DISAS_UPDATE;
7018                }
7019            }
7020            break;
7021        case 0xa:
7022        case 0xb:
7023            {
7024                int32_t offset;
7025                /* branch (and link) */
7026                val = (int32_t)s->pc;
7027                if (insn & (1 << 24)) {
7028                    tmp = new_tmp();
7029                    tcg_gen_movi_i32(tmp, val);
7030                    store_reg(s, 14, tmp);
7031                }
7032                offset = (((int32_t)insn << 8) >> 8);
7033                val += (offset << 2) + 4;
7034                gen_jmp(s, val);
7035            }
7036            break;
7037        case 0xc:
7038        case 0xd:
7039        case 0xe:
7040            /* Coprocessor.  */
7041            if (disas_coproc_insn(env, s, insn))
7042                goto illegal_op;
7043            break;
7044        case 0xf:
7045            /* swi */
7046            gen_set_pc_im(s->pc);
7047            s->is_jmp = DISAS_SWI;
7048            break;
7049        default:
7050        illegal_op:
7051            gen_set_condexec(s);
7052            gen_set_pc_im(s->pc - 4);
7053            gen_exception(EXCP_UDEF);
7054            s->is_jmp = DISAS_JUMP;
7055            break;
7056        }
7057    }
7058}
7059
7060/* Return true if this is a Thumb-2 logical op.  */
7061static int
7062thumb2_logic_op(int op)
7063{
7064    return (op < 8);
7065}
7066
7067/* Generate code for a Thumb-2 data processing operation.  If CONDS is nonzero
7068   then set condition code flags based on the result of the operation.
7069   If SHIFTER_OUT is nonzero then set the carry flag for logical operations
7070   to the high bit of T1.
7071   Returns zero if the opcode is valid.  */
7072
7073static int
7074gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out)
7075{
7076    int logic_cc;
7077
7078    logic_cc = 0;
7079    switch (op) {
7080    case 0: /* and */
7081        gen_op_andl_T0_T1();
7082        logic_cc = conds;
7083        break;
7084    case 1: /* bic */
7085        gen_op_bicl_T0_T1();
7086        logic_cc = conds;
7087        break;
7088    case 2: /* orr */
7089        gen_op_orl_T0_T1();
7090        logic_cc = conds;
7091        break;
7092    case 3: /* orn */
7093        gen_op_notl_T1();
7094        gen_op_orl_T0_T1();
7095        logic_cc = conds;
7096        break;
7097    case 4: /* eor */
7098        gen_op_xorl_T0_T1();
7099        logic_cc = conds;
7100        break;
7101    case 8: /* add */
7102        if (conds)
7103            gen_op_addl_T0_T1_cc();
7104        else
7105            gen_op_addl_T0_T1();
7106        break;
7107    case 10: /* adc */
7108        if (conds)
7109            gen_op_adcl_T0_T1_cc();
7110        else
7111            gen_adc_T0_T1();
7112        break;
7113    case 11: /* sbc */
7114        if (conds)
7115            gen_op_sbcl_T0_T1_cc();
7116        else
7117            gen_sbc_T0_T1();
7118        break;
7119    case 13: /* sub */
7120        if (conds)
7121            gen_op_subl_T0_T1_cc();
7122        else
7123            gen_op_subl_T0_T1();
7124        break;
7125    case 14: /* rsb */
7126        if (conds)
7127            gen_op_rsbl_T0_T1_cc();
7128        else
7129            gen_op_rsbl_T0_T1();
7130        break;
7131    default: /* 5, 6, 7, 9, 12, 15. */
7132        return 1;
7133    }
7134    if (logic_cc) {
7135        gen_op_logic_T0_cc();
7136        if (shifter_out)
7137            gen_set_CF_bit31(cpu_T[1]);
7138    }
7139    return 0;
7140}
7141
7142/* Translate a 32-bit thumb instruction.  Returns nonzero if the instruction
7143   is not legal.  */
7144static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
7145{
7146    uint32_t insn, imm, shift, offset;
7147    uint32_t rd, rn, rm, rs;
7148    TCGv tmp;
7149    TCGv tmp2;
7150    TCGv tmp3;
7151    TCGv addr;
7152    TCGv_i64 tmp64;
7153    int op;
7154    int shiftop;
7155    int conds;
7156    int logic_cc;
7157
7158    if (!(arm_feature(env, ARM_FEATURE_THUMB2)
7159          || arm_feature (env, ARM_FEATURE_M))) {
7160        /* Thumb-1 cores may need to treat bl and blx as a pair of
7161           16-bit instructions to get correct prefetch abort behavior.  */
7162        insn = insn_hw1;
7163        if ((insn & (1 << 12)) == 0) {
7164            /* Second half of blx.  */
7165            offset = ((insn & 0x7ff) << 1);
7166            tmp = load_reg(s, 14);
7167            tcg_gen_addi_i32(tmp, tmp, offset);
7168            tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
7169
7170            tmp2 = new_tmp();
7171            tcg_gen_movi_i32(tmp2, s->pc | 1);
7172            store_reg(s, 14, tmp2);
7173            gen_bx(s, tmp);
7174            return 0;
7175        }
7176        if (insn & (1 << 11)) {
7177            /* Second half of bl.  */
7178            offset = ((insn & 0x7ff) << 1) | 1;
7179            tmp = load_reg(s, 14);
7180            tcg_gen_addi_i32(tmp, tmp, offset);
7181
7182            tmp2 = new_tmp();
7183            tcg_gen_movi_i32(tmp2, s->pc | 1);
7184            store_reg(s, 14, tmp2);
7185            gen_bx(s, tmp);
7186            return 0;
7187        }
7188        if ((s->pc & ~TARGET_PAGE_MASK) == 0) {
7189            /* Instruction spans a page boundary.  Implement it as two
7190               16-bit instructions in case the second half causes an
7191               prefetch abort.  */
7192            offset = ((int32_t)insn << 21) >> 9;
7193            gen_op_movl_T0_im(s->pc + 2 + offset);
7194            gen_movl_reg_T0(s, 14);
7195            return 0;
7196        }
7197        /* Fall through to 32-bit decode.  */
7198    }
7199
7200    insn = lduw_code(s->pc);
7201#ifdef CONFIG_TRACE
7202    if (tracing) {
7203        int  ticks = get_insn_ticks_thumb(insn);
7204        trace_add_insn( insn_wrap_thumb(insn), 1 );
7205        gen_traceInsn();
7206        gen_traceTicks(ticks);
7207    }
7208#endif
7209
7210    insn |= (uint32_t)insn_hw1 << 16;
7211
7212    s->pc += 2;
7213
7214    if ((insn & 0xf800e800) != 0xf000e800) {
7215        ARCH(6T2);
7216    }
7217
7218    rn = (insn >> 16) & 0xf;
7219    rs = (insn >> 12) & 0xf;
7220    rd = (insn >> 8) & 0xf;
7221    rm = insn & 0xf;
7222    switch ((insn >> 25) & 0xf) {
7223    case 0: case 1: case 2: case 3:
7224        /* 16-bit instructions.  Should never happen.  */
7225        abort();
7226    case 4:
7227        if (insn & (1 << 22)) {
7228            /* Other load/store, table branch.  */
7229            if (insn & 0x01200000) {
7230                /* Load/store doubleword.  */
7231                if (rn == 15) {
7232                    addr = new_tmp();
7233                    tcg_gen_movi_i32(addr, s->pc & ~3);
7234                } else {
7235                    addr = load_reg(s, rn);
7236                }
7237                offset = (insn & 0xff) * 4;
7238                if ((insn & (1 << 23)) == 0)
7239                    offset = -offset;
7240                if (insn & (1 << 24)) {
7241                    tcg_gen_addi_i32(addr, addr, offset);
7242                    offset = 0;
7243                }
7244                if (insn & (1 << 20)) {
7245                    /* ldrd */
7246                    tmp = gen_ld32(addr, IS_USER(s));
7247                    store_reg(s, rs, tmp);
7248                    tcg_gen_addi_i32(addr, addr, 4);
7249                    tmp = gen_ld32(addr, IS_USER(s));
7250                    store_reg(s, rd, tmp);
7251                } else {
7252                    /* strd */
7253                    tmp = load_reg(s, rs);
7254                    gen_st32(tmp, addr, IS_USER(s));
7255                    tcg_gen_addi_i32(addr, addr, 4);
7256                    tmp = load_reg(s, rd);
7257                    gen_st32(tmp, addr, IS_USER(s));
7258                }
7259                if (insn & (1 << 21)) {
7260                    /* Base writeback.  */
7261                    if (rn == 15)
7262                        goto illegal_op;
7263                    tcg_gen_addi_i32(addr, addr, offset - 4);
7264                    store_reg(s, rn, addr);
7265                } else {
7266                    dead_tmp(addr);
7267                }
7268            } else if ((insn & (1 << 23)) == 0) {
7269                /* Load/store exclusive word.  */
7270                gen_movl_T1_reg(s, rn);
7271                addr = cpu_T[1];
7272                if (insn & (1 << 20)) {
7273                    gen_helper_mark_exclusive(cpu_env, cpu_T[1]);
7274                    tmp = gen_ld32(addr, IS_USER(s));
7275                    store_reg(s, rd, tmp);
7276                } else {
7277                    int label = gen_new_label();
7278                    gen_helper_test_exclusive(cpu_T[0], cpu_env, addr);
7279                    tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0],
7280                                        0, label);
7281                    tmp = load_reg(s, rs);
7282                    gen_st32(tmp, cpu_T[1], IS_USER(s));
7283                    gen_set_label(label);
7284                    gen_movl_reg_T0(s, rd);
7285                }
7286            } else if ((insn & (1 << 6)) == 0) {
7287                /* Table Branch.  */
7288                if (rn == 15) {
7289                    addr = new_tmp();
7290                    tcg_gen_movi_i32(addr, s->pc);
7291                } else {
7292                    addr = load_reg(s, rn);
7293                }
7294                tmp = load_reg(s, rm);
7295                tcg_gen_add_i32(addr, addr, tmp);
7296                if (insn & (1 << 4)) {
7297                    /* tbh */
7298                    tcg_gen_add_i32(addr, addr, tmp);
7299                    dead_tmp(tmp);
7300                    tmp = gen_ld16u(addr, IS_USER(s));
7301                } else { /* tbb */
7302                    dead_tmp(tmp);
7303                    tmp = gen_ld8u(addr, IS_USER(s));
7304                }
7305                dead_tmp(addr);
7306                tcg_gen_shli_i32(tmp, tmp, 1);
7307                tcg_gen_addi_i32(tmp, tmp, s->pc);
7308                store_reg(s, 15, tmp);
7309            } else {
7310                /* Load/store exclusive byte/halfword/doubleword.  */
7311                /* ??? These are not really atomic.  However we know
7312                   we never have multiple CPUs running in parallel,
7313                   so it is good enough.  */
7314                op = (insn >> 4) & 0x3;
7315                /* Must use a global reg for the address because we have
7316                   a conditional branch in the store instruction.  */
7317                gen_movl_T1_reg(s, rn);
7318                addr = cpu_T[1];
7319                if (insn & (1 << 20)) {
7320                    gen_helper_mark_exclusive(cpu_env, addr);
7321                    switch (op) {
7322                    case 0:
7323                        tmp = gen_ld8u(addr, IS_USER(s));
7324                        break;
7325                    case 1:
7326                        tmp = gen_ld16u(addr, IS_USER(s));
7327                        break;
7328                    case 3:
7329                        tmp = gen_ld32(addr, IS_USER(s));
7330                        tcg_gen_addi_i32(addr, addr, 4);
7331                        tmp2 = gen_ld32(addr, IS_USER(s));
7332                        store_reg(s, rd, tmp2);
7333                        break;
7334                    default:
7335                        goto illegal_op;
7336                    }
7337                    store_reg(s, rs, tmp);
7338                } else {
7339                    int label = gen_new_label();
7340                    /* Must use a global that is not killed by the branch.  */
7341                    gen_helper_test_exclusive(cpu_T[0], cpu_env, addr);
7342                    tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0], 0, label);
7343                    tmp = load_reg(s, rs);
7344                    switch (op) {
7345                    case 0:
7346                        gen_st8(tmp, addr, IS_USER(s));
7347                        break;
7348                    case 1:
7349                        gen_st16(tmp, addr, IS_USER(s));
7350                        break;
7351                    case 3:
7352                        gen_st32(tmp, addr, IS_USER(s));
7353                        tcg_gen_addi_i32(addr, addr, 4);
7354                        tmp = load_reg(s, rd);
7355                        gen_st32(tmp, addr, IS_USER(s));
7356                        break;
7357                    default:
7358                        goto illegal_op;
7359                    }
7360                    gen_set_label(label);
7361                    gen_movl_reg_T0(s, rm);
7362                }
7363            }
7364        } else {
7365            /* Load/store multiple, RFE, SRS.  */
7366            if (((insn >> 23) & 1) == ((insn >> 24) & 1)) {
7367                /* Not available in user mode.  */
7368                if (IS_USER(s))
7369                    goto illegal_op;
7370                if (insn & (1 << 20)) {
7371                    /* rfe */
7372                    addr = load_reg(s, rn);
7373                    if ((insn & (1 << 24)) == 0)
7374                        tcg_gen_addi_i32(addr, addr, -8);
7375                    /* Load PC into tmp and CPSR into tmp2.  */
7376                    tmp = gen_ld32(addr, 0);
7377                    tcg_gen_addi_i32(addr, addr, 4);
7378                    tmp2 = gen_ld32(addr, 0);
7379                    if (insn & (1 << 21)) {
7380                        /* Base writeback.  */
7381                        if (insn & (1 << 24)) {
7382                            tcg_gen_addi_i32(addr, addr, 4);
7383                        } else {
7384                            tcg_gen_addi_i32(addr, addr, -4);
7385                        }
7386                        store_reg(s, rn, addr);
7387                    } else {
7388                        dead_tmp(addr);
7389                    }
7390                    gen_rfe(s, tmp, tmp2);
7391                } else {
7392                    /* srs */
7393                    op = (insn & 0x1f);
7394                    if (op == (env->uncached_cpsr & CPSR_M)) {
7395                        addr = load_reg(s, 13);
7396                    } else {
7397                        addr = new_tmp();
7398                        gen_helper_get_r13_banked(addr, cpu_env, tcg_const_i32(op));
7399                    }
7400                    if ((insn & (1 << 24)) == 0) {
7401                        tcg_gen_addi_i32(addr, addr, -8);
7402                    }
7403                    tmp = load_reg(s, 14);
7404                    gen_st32(tmp, addr, 0);
7405                    tcg_gen_addi_i32(addr, addr, 4);
7406                    tmp = new_tmp();
7407                    gen_helper_cpsr_read(tmp);
7408                    gen_st32(tmp, addr, 0);
7409                    if (insn & (1 << 21)) {
7410                        if ((insn & (1 << 24)) == 0) {
7411                            tcg_gen_addi_i32(addr, addr, -4);
7412                        } else {
7413                            tcg_gen_addi_i32(addr, addr, 4);
7414                        }
7415                        if (op == (env->uncached_cpsr & CPSR_M)) {
7416                            store_reg(s, 13, addr);
7417                        } else {
7418                            gen_helper_set_r13_banked(cpu_env,
7419                                tcg_const_i32(op), addr);
7420                        }
7421                    } else {
7422                        dead_tmp(addr);
7423                    }
7424                }
7425            } else {
7426                int i;
7427                /* Load/store multiple.  */
7428                addr = load_reg(s, rn);
7429                offset = 0;
7430                for (i = 0; i < 16; i++) {
7431                    if (insn & (1 << i))
7432                        offset += 4;
7433                }
7434                if (insn & (1 << 24)) {
7435                    tcg_gen_addi_i32(addr, addr, -offset);
7436                }
7437
7438                for (i = 0; i < 16; i++) {
7439                    if ((insn & (1 << i)) == 0)
7440                        continue;
7441                    if (insn & (1 << 20)) {
7442                        /* Load.  */
7443                        tmp = gen_ld32(addr, IS_USER(s));
7444                        if (i == 15) {
7445                            gen_bx(s, tmp);
7446                        } else {
7447                            store_reg(s, i, tmp);
7448                        }
7449                    } else {
7450                        /* Store.  */
7451                        tmp = load_reg(s, i);
7452                        gen_st32(tmp, addr, IS_USER(s));
7453                    }
7454                    tcg_gen_addi_i32(addr, addr, 4);
7455                }
7456                if (insn & (1 << 21)) {
7457                    /* Base register writeback.  */
7458                    if (insn & (1 << 24)) {
7459                        tcg_gen_addi_i32(addr, addr, -offset);
7460                    }
7461                    /* Fault if writeback register is in register list.  */
7462                    if (insn & (1 << rn))
7463                        goto illegal_op;
7464                    store_reg(s, rn, addr);
7465                } else {
7466                    dead_tmp(addr);
7467                }
7468            }
7469        }
7470        break;
7471    case 5: /* Data processing register constant shift.  */
7472        if (rn == 15)
7473            gen_op_movl_T0_im(0);
7474        else
7475            gen_movl_T0_reg(s, rn);
7476        gen_movl_T1_reg(s, rm);
7477        op = (insn >> 21) & 0xf;
7478        shiftop = (insn >> 4) & 3;
7479        shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
7480        conds = (insn & (1 << 20)) != 0;
7481        logic_cc = (conds && thumb2_logic_op(op));
7482        gen_arm_shift_im(cpu_T[1], shiftop, shift, logic_cc);
7483        if (gen_thumb2_data_op(s, op, conds, 0))
7484            goto illegal_op;
7485        if (rd != 15)
7486            gen_movl_reg_T0(s, rd);
7487        break;
7488    case 13: /* Misc data processing.  */
7489        op = ((insn >> 22) & 6) | ((insn >> 7) & 1);
7490        if (op < 4 && (insn & 0xf000) != 0xf000)
7491            goto illegal_op;
7492        switch (op) {
7493        case 0: /* Register controlled shift.  */
7494            tmp = load_reg(s, rn);
7495            tmp2 = load_reg(s, rm);
7496            if ((insn & 0x70) != 0)
7497                goto illegal_op;
7498            op = (insn >> 21) & 3;
7499            logic_cc = (insn & (1 << 20)) != 0;
7500            gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
7501            if (logic_cc)
7502                gen_logic_CC(tmp);
7503            store_reg_bx(env, s, rd, tmp);
7504            break;
7505        case 1: /* Sign/zero extend.  */
7506            tmp = load_reg(s, rm);
7507            shift = (insn >> 4) & 3;
7508            /* ??? In many cases it's not neccessary to do a
7509               rotate, a shift is sufficient.  */
7510            if (shift != 0)
7511                tcg_gen_rori_i32(tmp, tmp, shift * 8);
7512            op = (insn >> 20) & 7;
7513            switch (op) {
7514            case 0: gen_sxth(tmp);   break;
7515            case 1: gen_uxth(tmp);   break;
7516            case 2: gen_sxtb16(tmp); break;
7517            case 3: gen_uxtb16(tmp); break;
7518            case 4: gen_sxtb(tmp);   break;
7519            case 5: gen_uxtb(tmp);   break;
7520            default: goto illegal_op;
7521            }
7522            if (rn != 15) {
7523                tmp2 = load_reg(s, rn);
7524                if ((op >> 1) == 1) {
7525                    gen_add16(tmp, tmp2);
7526                } else {
7527                    tcg_gen_add_i32(tmp, tmp, tmp2);
7528                    dead_tmp(tmp2);
7529                }
7530            }
7531            store_reg(s, rd, tmp);
7532            break;
7533        case 2: /* SIMD add/subtract.  */
7534            op = (insn >> 20) & 7;
7535            shift = (insn >> 4) & 7;
7536            if ((op & 3) == 3 || (shift & 3) == 3)
7537                goto illegal_op;
7538            tmp = load_reg(s, rn);
7539            tmp2 = load_reg(s, rm);
7540            gen_thumb2_parallel_addsub(op, shift, tmp, tmp2);
7541            dead_tmp(tmp2);
7542            store_reg(s, rd, tmp);
7543            break;
7544        case 3: /* Other data processing.  */
7545            op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7);
7546            if (op < 4) {
7547                /* Saturating add/subtract.  */
7548                tmp = load_reg(s, rn);
7549                tmp2 = load_reg(s, rm);
7550                if (op & 2)
7551                    gen_helper_double_saturate(tmp, tmp);
7552                if (op & 1)
7553                    gen_helper_sub_saturate(tmp, tmp2, tmp);
7554                else
7555                    gen_helper_add_saturate(tmp, tmp, tmp2);
7556                dead_tmp(tmp2);
7557            } else {
7558                tmp = load_reg(s, rn);
7559                switch (op) {
7560                case 0x0a: /* rbit */
7561                    gen_helper_rbit(tmp, tmp);
7562                    break;
7563                case 0x08: /* rev */
7564                    tcg_gen_bswap32_i32(tmp, tmp);
7565                    break;
7566                case 0x09: /* rev16 */
7567                    gen_rev16(tmp);
7568                    break;
7569                case 0x0b: /* revsh */
7570                    gen_revsh(tmp);
7571                    break;
7572                case 0x10: /* sel */
7573                    tmp2 = load_reg(s, rm);
7574                    tmp3 = new_tmp();
7575                    tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE));
7576                    gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
7577                    dead_tmp(tmp3);
7578                    dead_tmp(tmp2);
7579                    break;
7580                case 0x18: /* clz */
7581                    gen_helper_clz(tmp, tmp);
7582                    break;
7583                default:
7584                    goto illegal_op;
7585                }
7586            }
7587            store_reg(s, rd, tmp);
7588            break;
7589        case 4: case 5: /* 32-bit multiply.  Sum of absolute differences.  */
7590            op = (insn >> 4) & 0xf;
7591            tmp = load_reg(s, rn);
7592            tmp2 = load_reg(s, rm);
7593            switch ((insn >> 20) & 7) {
7594            case 0: /* 32 x 32 -> 32 */
7595                tcg_gen_mul_i32(tmp, tmp, tmp2);
7596                dead_tmp(tmp2);
7597                if (rs != 15) {
7598                    tmp2 = load_reg(s, rs);
7599                    if (op)
7600                        tcg_gen_sub_i32(tmp, tmp2, tmp);
7601                    else
7602                        tcg_gen_add_i32(tmp, tmp, tmp2);
7603                    dead_tmp(tmp2);
7604                }
7605                break;
7606            case 1: /* 16 x 16 -> 32 */
7607                gen_mulxy(tmp, tmp2, op & 2, op & 1);
7608                dead_tmp(tmp2);
7609                if (rs != 15) {
7610                    tmp2 = load_reg(s, rs);
7611                    gen_helper_add_setq(tmp, tmp, tmp2);
7612                    dead_tmp(tmp2);
7613                }
7614                break;
7615            case 2: /* Dual multiply add.  */
7616            case 4: /* Dual multiply subtract.  */
7617                if (op)
7618                    gen_swap_half(tmp2);
7619                gen_smul_dual(tmp, tmp2);
7620                /* This addition cannot overflow.  */
7621                if (insn & (1 << 22)) {
7622                    tcg_gen_sub_i32(tmp, tmp, tmp2);
7623                } else {
7624                    tcg_gen_add_i32(tmp, tmp, tmp2);
7625                }
7626                dead_tmp(tmp2);
7627                if (rs != 15)
7628                  {
7629                    tmp2 = load_reg(s, rs);
7630                    gen_helper_add_setq(tmp, tmp, tmp2);
7631                    dead_tmp(tmp2);
7632                  }
7633                break;
7634            case 3: /* 32 * 16 -> 32msb */
7635                if (op)
7636                    tcg_gen_sari_i32(tmp2, tmp2, 16);
7637                else
7638                    gen_sxth(tmp2);
7639                tmp64 = gen_muls_i64_i32(tmp, tmp2);
7640                tcg_gen_shri_i64(tmp64, tmp64, 16);
7641                tmp = new_tmp();
7642                tcg_gen_trunc_i64_i32(tmp, tmp64);
7643                if (rs != 15)
7644                  {
7645                    tmp2 = load_reg(s, rs);
7646                    gen_helper_add_setq(tmp, tmp, tmp2);
7647                    dead_tmp(tmp2);
7648                  }
7649                break;
7650            case 5: case 6: /* 32 * 32 -> 32msb */
7651                gen_imull(tmp, tmp2);
7652                if (insn & (1 << 5)) {
7653                    gen_roundqd(tmp, tmp2);
7654                    dead_tmp(tmp2);
7655                } else {
7656                    dead_tmp(tmp);
7657                    tmp = tmp2;
7658                }
7659                if (rs != 15) {
7660                    tmp2 = load_reg(s, rs);
7661                    if (insn & (1 << 21)) {
7662                        tcg_gen_add_i32(tmp, tmp, tmp2);
7663                    } else {
7664                        tcg_gen_sub_i32(tmp, tmp2, tmp);
7665                    }
7666                    dead_tmp(tmp2);
7667                }
7668                break;
7669            case 7: /* Unsigned sum of absolute differences.  */
7670                gen_helper_usad8(tmp, tmp, tmp2);
7671                dead_tmp(tmp2);
7672                if (rs != 15) {
7673                    tmp2 = load_reg(s, rs);
7674                    tcg_gen_add_i32(tmp, tmp, tmp2);
7675                    dead_tmp(tmp2);
7676                }
7677                break;
7678            }
7679            store_reg(s, rd, tmp);
7680            break;
7681        case 6: case 7: /* 64-bit multiply, Divide.  */
7682            op = ((insn >> 4) & 0xf) | ((insn >> 16) & 0x70);
7683            tmp = load_reg(s, rn);
7684            tmp2 = load_reg(s, rm);
7685            if ((op & 0x50) == 0x10) {
7686                /* sdiv, udiv */
7687                if (!arm_feature(env, ARM_FEATURE_DIV))
7688                    goto illegal_op;
7689                if (op & 0x20)
7690                    gen_helper_udiv(tmp, tmp, tmp2);
7691                else
7692                    gen_helper_sdiv(tmp, tmp, tmp2);
7693                dead_tmp(tmp2);
7694                store_reg(s, rd, tmp);
7695            } else if ((op & 0xe) == 0xc) {
7696                /* Dual multiply accumulate long.  */
7697                if (op & 1)
7698                    gen_swap_half(tmp2);
7699                gen_smul_dual(tmp, tmp2);
7700                if (op & 0x10) {
7701                    tcg_gen_sub_i32(tmp, tmp, tmp2);
7702                } else {
7703                    tcg_gen_add_i32(tmp, tmp, tmp2);
7704                }
7705                dead_tmp(tmp2);
7706                /* BUGFIX */
7707                tmp64 = tcg_temp_new_i64();
7708                tcg_gen_ext_i32_i64(tmp64, tmp);
7709                dead_tmp(tmp);
7710                gen_addq(s, tmp64, rs, rd);
7711                gen_storeq_reg(s, rs, rd, tmp64);
7712            } else {
7713                if (op & 0x20) {
7714                    /* Unsigned 64-bit multiply  */
7715                    tmp64 = gen_mulu_i64_i32(tmp, tmp2);
7716                } else {
7717                    if (op & 8) {
7718                        /* smlalxy */
7719                        gen_mulxy(tmp, tmp2, op & 2, op & 1);
7720                        dead_tmp(tmp2);
7721                        tmp64 = tcg_temp_new_i64();
7722                        tcg_gen_ext_i32_i64(tmp64, tmp);
7723                        dead_tmp(tmp);
7724                    } else {
7725                        /* Signed 64-bit multiply  */
7726                        tmp64 = gen_muls_i64_i32(tmp, tmp2);
7727                    }
7728                }
7729                if (op & 4) {
7730                    /* umaal */
7731                    gen_addq_lo(s, tmp64, rs);
7732                    gen_addq_lo(s, tmp64, rd);
7733                } else if (op & 0x40) {
7734                    /* 64-bit accumulate.  */
7735                    gen_addq(s, tmp64, rs, rd);
7736                }
7737                gen_storeq_reg(s, rs, rd, tmp64);
7738            }
7739            break;
7740        }
7741        break;
7742    case 6: case 7: case 14: case 15:
7743        /* Coprocessor.  */
7744        if (((insn >> 24) & 3) == 3) {
7745            /* Translate into the equivalent ARM encoding.  */
7746            insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4);
7747            if (disas_neon_data_insn(env, s, insn))
7748                goto illegal_op;
7749        } else {
7750            if (insn & (1 << 28))
7751                goto illegal_op;
7752            if (disas_coproc_insn (env, s, insn))
7753                goto illegal_op;
7754        }
7755        break;
7756    case 8: case 9: case 10: case 11:
7757        if (insn & (1 << 15)) {
7758            /* Branches, misc control.  */
7759            if (insn & 0x5000) {
7760                /* Unconditional branch.  */
7761                /* signextend(hw1[10:0]) -> offset[:12].  */
7762                offset = ((int32_t)insn << 5) >> 9 & ~(int32_t)0xfff;
7763                /* hw1[10:0] -> offset[11:1].  */
7764                offset |= (insn & 0x7ff) << 1;
7765                /* (~hw2[13, 11] ^ offset[24]) -> offset[23,22]
7766                   offset[24:22] already have the same value because of the
7767                   sign extension above.  */
7768                offset ^= ((~insn) & (1 << 13)) << 10;
7769                offset ^= ((~insn) & (1 << 11)) << 11;
7770
7771                if (insn & (1 << 14)) {
7772                    /* Branch and link.  */
7773                    gen_op_movl_T1_im(s->pc | 1);
7774                    gen_movl_reg_T1(s, 14);
7775                }
7776
7777                offset += s->pc;
7778                if (insn & (1 << 12)) {
7779                    /* b/bl */
7780                    gen_jmp(s, offset);
7781                } else {
7782                    /* blx */
7783                    offset &= ~(uint32_t)2;
7784                    gen_bx_im(s, offset);
7785                }
7786            } else if (((insn >> 23) & 7) == 7) {
7787                /* Misc control */
7788                if (insn & (1 << 13))
7789                    goto illegal_op;
7790
7791                if (insn & (1 << 26)) {
7792                    /* Secure monitor call (v6Z) */
7793                    goto illegal_op; /* not implemented.  */
7794                } else {
7795                    op = (insn >> 20) & 7;
7796                    switch (op) {
7797                    case 0: /* msr cpsr.  */
7798                        if (IS_M(env)) {
7799                            tmp = load_reg(s, rn);
7800                            addr = tcg_const_i32(insn & 0xff);
7801                            gen_helper_v7m_msr(cpu_env, addr, tmp);
7802                            gen_lookup_tb(s);
7803                            break;
7804                        }
7805                        /* fall through */
7806                    case 1: /* msr spsr.  */
7807                        if (IS_M(env))
7808                            goto illegal_op;
7809                        gen_movl_T0_reg(s, rn);
7810                        if (gen_set_psr_T0(s,
7811                              msr_mask(env, s, (insn >> 8) & 0xf, op == 1),
7812                              op == 1))
7813                            goto illegal_op;
7814                        break;
7815                    case 2: /* cps, nop-hint.  */
7816                        if (((insn >> 8) & 7) == 0) {
7817                            gen_nop_hint(s, insn & 0xff);
7818                        }
7819                        /* Implemented as NOP in user mode.  */
7820                        if (IS_USER(s))
7821                            break;
7822                        offset = 0;
7823                        imm = 0;
7824                        if (insn & (1 << 10)) {
7825                            if (insn & (1 << 7))
7826                                offset |= CPSR_A;
7827                            if (insn & (1 << 6))
7828                                offset |= CPSR_I;
7829                            if (insn & (1 << 5))
7830                                offset |= CPSR_F;
7831                            if (insn & (1 << 9))
7832                                imm = CPSR_A | CPSR_I | CPSR_F;
7833                        }
7834                        if (insn & (1 << 8)) {
7835                            offset |= 0x1f;
7836                            imm |= (insn & 0x1f);
7837                        }
7838                        if (offset) {
7839                            gen_op_movl_T0_im(imm);
7840                            gen_set_psr_T0(s, offset, 0);
7841                        }
7842                        break;
7843                    case 3: /* Special control operations.  */
7844                        op = (insn >> 4) & 0xf;
7845                        switch (op) {
7846                        case 2: /* clrex */
7847                            gen_helper_clrex(cpu_env);
7848                            break;
7849                        case 4: /* dsb */
7850                        case 5: /* dmb */
7851                        case 6: /* isb */
7852                            /* These execute as NOPs.  */
7853                            ARCH(7);
7854                            break;
7855                        default:
7856                            goto illegal_op;
7857                        }
7858                        break;
7859                    case 4: /* bxj */
7860                        /* Trivial implementation equivalent to bx.  */
7861                        tmp = load_reg(s, rn);
7862                        gen_bx(s, tmp);
7863                        break;
7864                    case 5: /* Exception return.  */
7865                        /* Unpredictable in user mode.  */
7866                        goto illegal_op;
7867                    case 6: /* mrs cpsr.  */
7868                        tmp = new_tmp();
7869                        if (IS_M(env)) {
7870                            addr = tcg_const_i32(insn & 0xff);
7871                            gen_helper_v7m_mrs(tmp, cpu_env, addr);
7872                        } else {
7873                            gen_helper_cpsr_read(tmp);
7874                        }
7875                        store_reg(s, rd, tmp);
7876                        break;
7877                    case 7: /* mrs spsr.  */
7878                        /* Not accessible in user mode.  */
7879                        if (IS_USER(s) || IS_M(env))
7880                            goto illegal_op;
7881                        tmp = load_cpu_field(spsr);
7882                        store_reg(s, rd, tmp);
7883                        break;
7884                    }
7885                }
7886            } else {
7887                /* Conditional branch.  */
7888                op = (insn >> 22) & 0xf;
7889                /* Generate a conditional jump to next instruction.  */
7890                s->condlabel = gen_new_label();
7891                gen_test_cc(op ^ 1, s->condlabel);
7892                s->condjmp = 1;
7893
7894                /* offset[11:1] = insn[10:0] */
7895                offset = (insn & 0x7ff) << 1;
7896                /* offset[17:12] = insn[21:16].  */
7897                offset |= (insn & 0x003f0000) >> 4;
7898                /* offset[31:20] = insn[26].  */
7899                offset |= ((int32_t)((insn << 5) & 0x80000000)) >> 11;
7900                /* offset[18] = insn[13].  */
7901                offset |= (insn & (1 << 13)) << 5;
7902                /* offset[19] = insn[11].  */
7903                offset |= (insn & (1 << 11)) << 8;
7904
7905                /* jump to the offset */
7906                gen_jmp(s, s->pc + offset);
7907            }
7908        } else {
7909            /* Data processing immediate.  */
7910            if (insn & (1 << 25)) {
7911                if (insn & (1 << 24)) {
7912                    if (insn & (1 << 20))
7913                        goto illegal_op;
7914                    /* Bitfield/Saturate.  */
7915                    op = (insn >> 21) & 7;
7916                    imm = insn & 0x1f;
7917                    shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
7918                    if (rn == 15) {
7919                        tmp = new_tmp();
7920                        tcg_gen_movi_i32(tmp, 0);
7921                    } else {
7922                        tmp = load_reg(s, rn);
7923                    }
7924                    switch (op) {
7925                    case 2: /* Signed bitfield extract.  */
7926                        imm++;
7927                        if (shift + imm > 32)
7928                            goto illegal_op;
7929                        if (imm < 32)
7930                            gen_sbfx(tmp, shift, imm);
7931                        break;
7932                    case 6: /* Unsigned bitfield extract.  */
7933                        imm++;
7934                        if (shift + imm > 32)
7935                            goto illegal_op;
7936                        if (imm < 32)
7937                            gen_ubfx(tmp, shift, (1u << imm) - 1);
7938                        break;
7939                    case 3: /* Bitfield insert/clear.  */
7940                        if (imm < shift)
7941                            goto illegal_op;
7942                        imm = imm + 1 - shift;
7943                        if (imm != 32) {
7944                            tmp2 = load_reg(s, rd);
7945                            gen_bfi(tmp, tmp2, tmp, shift, (1u << imm) - 1);
7946                            dead_tmp(tmp2);
7947                        }
7948                        break;
7949                    case 7:
7950                        goto illegal_op;
7951                    default: /* Saturate.  */
7952                        if (shift) {
7953                            if (op & 1)
7954                                tcg_gen_sari_i32(tmp, tmp, shift);
7955                            else
7956                                tcg_gen_shli_i32(tmp, tmp, shift);
7957                        }
7958                        tmp2 = tcg_const_i32(imm);
7959                        if (op & 4) {
7960                            /* Unsigned.  */
7961                            if ((op & 1) && shift == 0)
7962                                gen_helper_usat16(tmp, tmp, tmp2);
7963                            else
7964                                gen_helper_usat(tmp, tmp, tmp2);
7965                        } else {
7966                            /* Signed.  */
7967                            if ((op & 1) && shift == 0)
7968                                gen_helper_ssat16(tmp, tmp, tmp2);
7969                            else
7970                                gen_helper_ssat(tmp, tmp, tmp2);
7971                        }
7972                        break;
7973                    }
7974                    store_reg(s, rd, tmp);
7975                } else {
7976                    imm = ((insn & 0x04000000) >> 15)
7977                          | ((insn & 0x7000) >> 4) | (insn & 0xff);
7978                    if (insn & (1 << 22)) {
7979                        /* 16-bit immediate.  */
7980                        imm |= (insn >> 4) & 0xf000;
7981                        if (insn & (1 << 23)) {
7982                            /* movt */
7983                            tmp = load_reg(s, rd);
7984                            tcg_gen_ext16u_i32(tmp, tmp);
7985                            tcg_gen_ori_i32(tmp, tmp, imm << 16);
7986                        } else {
7987                            /* movw */
7988                            tmp = new_tmp();
7989                            tcg_gen_movi_i32(tmp, imm);
7990                        }
7991                    } else {
7992                        /* Add/sub 12-bit immediate.  */
7993                        if (rn == 15) {
7994                            offset = s->pc & ~(uint32_t)3;
7995                            if (insn & (1 << 23))
7996                                offset -= imm;
7997                            else
7998                                offset += imm;
7999                            tmp = new_tmp();
8000                            tcg_gen_movi_i32(tmp, offset);
8001                        } else {
8002                            tmp = load_reg(s, rn);
8003                            if (insn & (1 << 23))
8004                                tcg_gen_subi_i32(tmp, tmp, imm);
8005                            else
8006                                tcg_gen_addi_i32(tmp, tmp, imm);
8007                        }
8008                    }
8009                    store_reg(s, rd, tmp);
8010                }
8011            } else {
8012                int shifter_out = 0;
8013                /* modified 12-bit immediate.  */
8014                shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12);
8015                imm = (insn & 0xff);
8016                switch (shift) {
8017                case 0: /* XY */
8018                    /* Nothing to do.  */
8019                    break;
8020                case 1: /* 00XY00XY */
8021                    imm |= imm << 16;
8022                    break;
8023                case 2: /* XY00XY00 */
8024                    imm |= imm << 16;
8025                    imm <<= 8;
8026                    break;
8027                case 3: /* XYXYXYXY */
8028                    imm |= imm << 16;
8029                    imm |= imm << 8;
8030                    break;
8031                default: /* Rotated constant.  */
8032                    shift = (shift << 1) | (imm >> 7);
8033                    imm |= 0x80;
8034                    imm = imm << (32 - shift);
8035                    shifter_out = 1;
8036                    break;
8037                }
8038                gen_op_movl_T1_im(imm);
8039                rn = (insn >> 16) & 0xf;
8040                if (rn == 15)
8041                    gen_op_movl_T0_im(0);
8042                else
8043                    gen_movl_T0_reg(s, rn);
8044                op = (insn >> 21) & 0xf;
8045                if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0,
8046                                       shifter_out))
8047                    goto illegal_op;
8048                rd = (insn >> 8) & 0xf;
8049                if (rd != 15) {
8050                    gen_movl_reg_T0(s, rd);
8051                }
8052            }
8053        }
8054        break;
8055    case 12: /* Load/store single data item.  */
8056        {
8057        int postinc = 0;
8058        int writeback = 0;
8059        int user;
8060        if ((insn & 0x01100000) == 0x01000000) {
8061            if (disas_neon_ls_insn(env, s, insn))
8062                goto illegal_op;
8063            break;
8064        }
8065        user = IS_USER(s);
8066        if (rn == 15) {
8067            addr = new_tmp();
8068            /* PC relative.  */
8069            /* s->pc has already been incremented by 4.  */
8070            imm = s->pc & 0xfffffffc;
8071            if (insn & (1 << 23))
8072                imm += insn & 0xfff;
8073            else
8074                imm -= insn & 0xfff;
8075            tcg_gen_movi_i32(addr, imm);
8076        } else {
8077            addr = load_reg(s, rn);
8078            if (insn & (1 << 23)) {
8079                /* Positive offset.  */
8080                imm = insn & 0xfff;
8081                tcg_gen_addi_i32(addr, addr, imm);
8082            } else {
8083                op = (insn >> 8) & 7;
8084                imm = insn & 0xff;
8085                switch (op) {
8086                case 0: case 8: /* Shifted Register.  */
8087                    shift = (insn >> 4) & 0xf;
8088                    if (shift > 3)
8089                        goto illegal_op;
8090                    tmp = load_reg(s, rm);
8091                    if (shift)
8092                        tcg_gen_shli_i32(tmp, tmp, shift);
8093                    tcg_gen_add_i32(addr, addr, tmp);
8094                    dead_tmp(tmp);
8095                    break;
8096                case 4: /* Negative offset.  */
8097                    tcg_gen_addi_i32(addr, addr, -imm);
8098                    break;
8099                case 6: /* User privilege.  */
8100                    tcg_gen_addi_i32(addr, addr, imm);
8101                    user = 1;
8102                    break;
8103                case 1: /* Post-decrement.  */
8104                    imm = -imm;
8105                    /* Fall through.  */
8106                case 3: /* Post-increment.  */
8107                    postinc = 1;
8108                    writeback = 1;
8109                    break;
8110                case 5: /* Pre-decrement.  */
8111                    imm = -imm;
8112                    /* Fall through.  */
8113                case 7: /* Pre-increment.  */
8114                    tcg_gen_addi_i32(addr, addr, imm);
8115                    writeback = 1;
8116                    break;
8117                default:
8118                    goto illegal_op;
8119                }
8120            }
8121        }
8122        op = ((insn >> 21) & 3) | ((insn >> 22) & 4);
8123        if (insn & (1 << 20)) {
8124            /* Load.  */
8125            if (rs == 15 && op != 2) {
8126                if (op & 2)
8127                    goto illegal_op;
8128                /* Memory hint.  Implemented as NOP.  */
8129            } else {
8130                switch (op) {
8131                case 0: tmp = gen_ld8u(addr, user); break;
8132                case 4: tmp = gen_ld8s(addr, user); break;
8133                case 1: tmp = gen_ld16u(addr, user); break;
8134                case 5: tmp = gen_ld16s(addr, user); break;
8135                case 2: tmp = gen_ld32(addr, user); break;
8136                default: goto illegal_op;
8137                }
8138                if (rs == 15) {
8139                    gen_bx(s, tmp);
8140                } else {
8141                    store_reg(s, rs, tmp);
8142                }
8143            }
8144        } else {
8145            /* Store.  */
8146            if (rs == 15)
8147                goto illegal_op;
8148            tmp = load_reg(s, rs);
8149            switch (op) {
8150            case 0: gen_st8(tmp, addr, user); break;
8151            case 1: gen_st16(tmp, addr, user); break;
8152            case 2: gen_st32(tmp, addr, user); break;
8153            default: goto illegal_op;
8154            }
8155        }
8156        if (postinc)
8157            tcg_gen_addi_i32(addr, addr, imm);
8158        if (writeback) {
8159            store_reg(s, rn, addr);
8160        } else {
8161            dead_tmp(addr);
8162        }
8163        }
8164        break;
8165    default:
8166        goto illegal_op;
8167    }
8168    return 0;
8169illegal_op:
8170    return 1;
8171}
8172
8173static void disas_thumb_insn(CPUState *env, DisasContext *s)
8174{
8175    uint32_t val, insn, op, rm, rn, rd, shift, cond;
8176    int32_t offset;
8177    int i;
8178    TCGv tmp;
8179    TCGv tmp2;
8180    TCGv addr;
8181
8182    if (s->condexec_mask) {
8183        cond = s->condexec_cond;
8184        s->condlabel = gen_new_label();
8185        gen_test_cc(cond ^ 1, s->condlabel);
8186        s->condjmp = 1;
8187    }
8188
8189    insn = lduw_code(s->pc);
8190
8191#ifdef CONFIG_MEMCHECK
8192    if (watch_call_stack(s)) {
8193        target_ulong ret_off;
8194        if (is_ret_address(env, s->pc)) {
8195            set_on_ret(s->pc);
8196        }
8197        if (is_thumb_bl_or_blx(insn, s->pc, &ret_off)) {
8198            set_on_call(s->pc, s->pc + ret_off);
8199            if (!s->search_pc) {
8200                register_ret_address(env, s->pc + ret_off);
8201            }
8202        }
8203    }
8204#endif  // CONFIG_MEMCHECK
8205
8206#ifdef CONFIG_TRACE
8207    if (tracing) {
8208        int  ticks = get_insn_ticks_thumb(insn);
8209        trace_add_insn( insn_wrap_thumb(insn), 1 );
8210        gen_traceInsn();
8211        gen_traceTicks(ticks);
8212    }
8213#endif
8214    s->pc += 2;
8215
8216    switch (insn >> 12) {
8217    case 0: case 1:
8218        rd = insn & 7;
8219        op = (insn >> 11) & 3;
8220        if (op == 3) {
8221            /* add/subtract */
8222            rn = (insn >> 3) & 7;
8223            gen_movl_T0_reg(s, rn);
8224            if (insn & (1 << 10)) {
8225                /* immediate */
8226                gen_op_movl_T1_im((insn >> 6) & 7);
8227            } else {
8228                /* reg */
8229                rm = (insn >> 6) & 7;
8230                gen_movl_T1_reg(s, rm);
8231            }
8232            if (insn & (1 << 9)) {
8233                if (s->condexec_mask)
8234                    gen_op_subl_T0_T1();
8235                else
8236                    gen_op_subl_T0_T1_cc();
8237            } else {
8238                if (s->condexec_mask)
8239                    gen_op_addl_T0_T1();
8240                else
8241                    gen_op_addl_T0_T1_cc();
8242            }
8243            gen_movl_reg_T0(s, rd);
8244        } else {
8245            /* shift immediate */
8246            rm = (insn >> 3) & 7;
8247            shift = (insn >> 6) & 0x1f;
8248            tmp = load_reg(s, rm);
8249            gen_arm_shift_im(tmp, op, shift, s->condexec_mask == 0);
8250            if (!s->condexec_mask)
8251                gen_logic_CC(tmp);
8252            store_reg(s, rd, tmp);
8253        }
8254        break;
8255    case 2: case 3:
8256        /* arithmetic large immediate */
8257        op = (insn >> 11) & 3;
8258        rd = (insn >> 8) & 0x7;
8259        if (op == 0) {
8260            gen_op_movl_T0_im(insn & 0xff);
8261        } else {
8262            gen_movl_T0_reg(s, rd);
8263            gen_op_movl_T1_im(insn & 0xff);
8264        }
8265        switch (op) {
8266        case 0: /* mov */
8267            if (!s->condexec_mask)
8268                gen_op_logic_T0_cc();
8269            break;
8270        case 1: /* cmp */
8271            gen_op_subl_T0_T1_cc();
8272            break;
8273        case 2: /* add */
8274            if (s->condexec_mask)
8275                gen_op_addl_T0_T1();
8276            else
8277                gen_op_addl_T0_T1_cc();
8278            break;
8279        case 3: /* sub */
8280            if (s->condexec_mask)
8281                gen_op_subl_T0_T1();
8282            else
8283                gen_op_subl_T0_T1_cc();
8284            break;
8285        }
8286        if (op != 1)
8287            gen_movl_reg_T0(s, rd);
8288        break;
8289    case 4:
8290        if (insn & (1 << 11)) {
8291            rd = (insn >> 8) & 7;
8292            /* load pc-relative.  Bit 1 of PC is ignored.  */
8293            val = s->pc + 2 + ((insn & 0xff) * 4);
8294            val &= ~(uint32_t)2;
8295            addr = new_tmp();
8296            tcg_gen_movi_i32(addr, val);
8297            tmp = gen_ld32(addr, IS_USER(s));
8298            dead_tmp(addr);
8299            store_reg(s, rd, tmp);
8300            break;
8301        }
8302        if (insn & (1 << 10)) {
8303            /* data processing extended or blx */
8304            rd = (insn & 7) | ((insn >> 4) & 8);
8305            rm = (insn >> 3) & 0xf;
8306            op = (insn >> 8) & 3;
8307            switch (op) {
8308            case 0: /* add */
8309                gen_movl_T0_reg(s, rd);
8310                gen_movl_T1_reg(s, rm);
8311                gen_op_addl_T0_T1();
8312                gen_movl_reg_T0(s, rd);
8313                break;
8314            case 1: /* cmp */
8315                gen_movl_T0_reg(s, rd);
8316                gen_movl_T1_reg(s, rm);
8317                gen_op_subl_T0_T1_cc();
8318                break;
8319            case 2: /* mov/cpy */
8320                gen_movl_T0_reg(s, rm);
8321                gen_movl_reg_T0(s, rd);
8322                break;
8323            case 3:/* branch [and link] exchange thumb register */
8324                tmp = load_reg(s, rm);
8325                if (insn & (1 << 7)) {
8326                    val = (uint32_t)s->pc | 1;
8327                    tmp2 = new_tmp();
8328                    tcg_gen_movi_i32(tmp2, val);
8329                    store_reg(s, 14, tmp2);
8330                }
8331                gen_bx(s, tmp);
8332                break;
8333            }
8334            break;
8335        }
8336
8337        /* data processing register */
8338        rd = insn & 7;
8339        rm = (insn >> 3) & 7;
8340        op = (insn >> 6) & 0xf;
8341        if (op == 2 || op == 3 || op == 4 || op == 7) {
8342            /* the shift/rotate ops want the operands backwards */
8343            val = rm;
8344            rm = rd;
8345            rd = val;
8346            val = 1;
8347        } else {
8348            val = 0;
8349        }
8350
8351        if (op == 9) /* neg */
8352            gen_op_movl_T0_im(0);
8353        else if (op != 0xf) /* mvn doesn't read its first operand */
8354            gen_movl_T0_reg(s, rd);
8355
8356        gen_movl_T1_reg(s, rm);
8357        switch (op) {
8358        case 0x0: /* and */
8359            gen_op_andl_T0_T1();
8360            if (!s->condexec_mask)
8361                gen_op_logic_T0_cc();
8362            break;
8363        case 0x1: /* eor */
8364            gen_op_xorl_T0_T1();
8365            if (!s->condexec_mask)
8366                gen_op_logic_T0_cc();
8367            break;
8368        case 0x2: /* lsl */
8369            if (s->condexec_mask) {
8370                gen_helper_shl(cpu_T[1], cpu_T[1], cpu_T[0]);
8371            } else {
8372                gen_helper_shl_cc(cpu_T[1], cpu_T[1], cpu_T[0]);
8373                gen_op_logic_T1_cc();
8374            }
8375            break;
8376        case 0x3: /* lsr */
8377            if (s->condexec_mask) {
8378                gen_helper_shr(cpu_T[1], cpu_T[1], cpu_T[0]);
8379            } else {
8380                gen_helper_shr_cc(cpu_T[1], cpu_T[1], cpu_T[0]);
8381                gen_op_logic_T1_cc();
8382            }
8383            break;
8384        case 0x4: /* asr */
8385            if (s->condexec_mask) {
8386                gen_helper_sar(cpu_T[1], cpu_T[1], cpu_T[0]);
8387            } else {
8388                gen_helper_sar_cc(cpu_T[1], cpu_T[1], cpu_T[0]);
8389                gen_op_logic_T1_cc();
8390            }
8391            break;
8392        case 0x5: /* adc */
8393            if (s->condexec_mask)
8394                gen_adc_T0_T1();
8395            else
8396                gen_op_adcl_T0_T1_cc();
8397            break;
8398        case 0x6: /* sbc */
8399            if (s->condexec_mask)
8400                gen_sbc_T0_T1();
8401            else
8402                gen_op_sbcl_T0_T1_cc();
8403            break;
8404        case 0x7: /* ror */
8405            if (s->condexec_mask) {
8406                gen_helper_ror(cpu_T[1], cpu_T[1], cpu_T[0]);
8407            } else {
8408                gen_helper_ror_cc(cpu_T[1], cpu_T[1], cpu_T[0]);
8409                gen_op_logic_T1_cc();
8410            }
8411            break;
8412        case 0x8: /* tst */
8413            gen_op_andl_T0_T1();
8414            gen_op_logic_T0_cc();
8415            rd = 16;
8416            break;
8417        case 0x9: /* neg */
8418            if (s->condexec_mask)
8419                tcg_gen_neg_i32(cpu_T[0], cpu_T[1]);
8420            else
8421                gen_op_subl_T0_T1_cc();
8422            break;
8423        case 0xa: /* cmp */
8424            gen_op_subl_T0_T1_cc();
8425            rd = 16;
8426            break;
8427        case 0xb: /* cmn */
8428            gen_op_addl_T0_T1_cc();
8429            rd = 16;
8430            break;
8431        case 0xc: /* orr */
8432            gen_op_orl_T0_T1();
8433            if (!s->condexec_mask)
8434                gen_op_logic_T0_cc();
8435            break;
8436        case 0xd: /* mul */
8437            gen_op_mull_T0_T1();
8438            if (!s->condexec_mask)
8439                gen_op_logic_T0_cc();
8440            break;
8441        case 0xe: /* bic */
8442            gen_op_bicl_T0_T1();
8443            if (!s->condexec_mask)
8444                gen_op_logic_T0_cc();
8445            break;
8446        case 0xf: /* mvn */
8447            gen_op_notl_T1();
8448            if (!s->condexec_mask)
8449                gen_op_logic_T1_cc();
8450            val = 1;
8451            rm = rd;
8452            break;
8453        }
8454        if (rd != 16) {
8455            if (val)
8456                gen_movl_reg_T1(s, rm);
8457            else
8458                gen_movl_reg_T0(s, rd);
8459        }
8460        break;
8461
8462    case 5:
8463        /* load/store register offset.  */
8464        rd = insn & 7;
8465        rn = (insn >> 3) & 7;
8466        rm = (insn >> 6) & 7;
8467        op = (insn >> 9) & 7;
8468        addr = load_reg(s, rn);
8469        tmp = load_reg(s, rm);
8470        tcg_gen_add_i32(addr, addr, tmp);
8471        dead_tmp(tmp);
8472
8473        if (op < 3) /* store */
8474            tmp = load_reg(s, rd);
8475
8476        switch (op) {
8477        case 0: /* str */
8478            gen_st32(tmp, addr, IS_USER(s));
8479            break;
8480        case 1: /* strh */
8481            gen_st16(tmp, addr, IS_USER(s));
8482            break;
8483        case 2: /* strb */
8484            gen_st8(tmp, addr, IS_USER(s));
8485            break;
8486        case 3: /* ldrsb */
8487            tmp = gen_ld8s(addr, IS_USER(s));
8488            break;
8489        case 4: /* ldr */
8490            tmp = gen_ld32(addr, IS_USER(s));
8491            break;
8492        case 5: /* ldrh */
8493            tmp = gen_ld16u(addr, IS_USER(s));
8494            break;
8495        case 6: /* ldrb */
8496            tmp = gen_ld8u(addr, IS_USER(s));
8497            break;
8498        case 7: /* ldrsh */
8499            tmp = gen_ld16s(addr, IS_USER(s));
8500            break;
8501        }
8502        if (op >= 3) /* load */
8503            store_reg(s, rd, tmp);
8504        dead_tmp(addr);
8505        break;
8506
8507    case 6:
8508        /* load/store word immediate offset */
8509        rd = insn & 7;
8510        rn = (insn >> 3) & 7;
8511        addr = load_reg(s, rn);
8512        val = (insn >> 4) & 0x7c;
8513        tcg_gen_addi_i32(addr, addr, val);
8514
8515        if (insn & (1 << 11)) {
8516            /* load */
8517            tmp = gen_ld32(addr, IS_USER(s));
8518            store_reg(s, rd, tmp);
8519        } else {
8520            /* store */
8521            tmp = load_reg(s, rd);
8522            gen_st32(tmp, addr, IS_USER(s));
8523        }
8524        dead_tmp(addr);
8525        break;
8526
8527    case 7:
8528        /* load/store byte immediate offset */
8529        rd = insn & 7;
8530        rn = (insn >> 3) & 7;
8531        addr = load_reg(s, rn);
8532        val = (insn >> 6) & 0x1f;
8533        tcg_gen_addi_i32(addr, addr, val);
8534
8535        if (insn & (1 << 11)) {
8536            /* load */
8537            tmp = gen_ld8u(addr, IS_USER(s));
8538            store_reg(s, rd, tmp);
8539        } else {
8540            /* store */
8541            tmp = load_reg(s, rd);
8542            gen_st8(tmp, addr, IS_USER(s));
8543        }
8544        dead_tmp(addr);
8545        break;
8546
8547    case 8:
8548        /* load/store halfword immediate offset */
8549        rd = insn & 7;
8550        rn = (insn >> 3) & 7;
8551        addr = load_reg(s, rn);
8552        val = (insn >> 5) & 0x3e;
8553        tcg_gen_addi_i32(addr, addr, val);
8554
8555        if (insn & (1 << 11)) {
8556            /* load */
8557            tmp = gen_ld16u(addr, IS_USER(s));
8558            store_reg(s, rd, tmp);
8559        } else {
8560            /* store */
8561            tmp = load_reg(s, rd);
8562            gen_st16(tmp, addr, IS_USER(s));
8563        }
8564        dead_tmp(addr);
8565        break;
8566
8567    case 9:
8568        /* load/store from stack */
8569        rd = (insn >> 8) & 7;
8570        addr = load_reg(s, 13);
8571        val = (insn & 0xff) * 4;
8572        tcg_gen_addi_i32(addr, addr, val);
8573
8574        if (insn & (1 << 11)) {
8575            /* load */
8576            tmp = gen_ld32(addr, IS_USER(s));
8577            store_reg(s, rd, tmp);
8578        } else {
8579            /* store */
8580            tmp = load_reg(s, rd);
8581            gen_st32(tmp, addr, IS_USER(s));
8582        }
8583        dead_tmp(addr);
8584        break;
8585
8586    case 10:
8587        /* add to high reg */
8588        rd = (insn >> 8) & 7;
8589        if (insn & (1 << 11)) {
8590            /* SP */
8591            tmp = load_reg(s, 13);
8592        } else {
8593            /* PC. bit 1 is ignored.  */
8594            tmp = new_tmp();
8595            tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2);
8596        }
8597        val = (insn & 0xff) * 4;
8598        tcg_gen_addi_i32(tmp, tmp, val);
8599        store_reg(s, rd, tmp);
8600        break;
8601
8602    case 11:
8603        /* misc */
8604        op = (insn >> 8) & 0xf;
8605        switch (op) {
8606        case 0:
8607            /* adjust stack pointer */
8608            tmp = load_reg(s, 13);
8609            val = (insn & 0x7f) * 4;
8610            if (insn & (1 << 7))
8611                val = -(int32_t)val;
8612            tcg_gen_addi_i32(tmp, tmp, val);
8613            store_reg(s, 13, tmp);
8614            break;
8615
8616        case 2: /* sign/zero extend.  */
8617            ARCH(6);
8618            rd = insn & 7;
8619            rm = (insn >> 3) & 7;
8620            tmp = load_reg(s, rm);
8621            switch ((insn >> 6) & 3) {
8622            case 0: gen_sxth(tmp); break;
8623            case 1: gen_sxtb(tmp); break;
8624            case 2: gen_uxth(tmp); break;
8625            case 3: gen_uxtb(tmp); break;
8626            }
8627            store_reg(s, rd, tmp);
8628            break;
8629        case 4: case 5: case 0xc: case 0xd:
8630            /* push/pop */
8631            addr = load_reg(s, 13);
8632            if (insn & (1 << 8))
8633                offset = 4;
8634            else
8635                offset = 0;
8636            for (i = 0; i < 8; i++) {
8637                if (insn & (1 << i))
8638                    offset += 4;
8639            }
8640            if ((insn & (1 << 11)) == 0) {
8641                tcg_gen_addi_i32(addr, addr, -offset);
8642            }
8643            for (i = 0; i < 8; i++) {
8644                if (insn & (1 << i)) {
8645                    if (insn & (1 << 11)) {
8646                        /* pop */
8647                        tmp = gen_ld32(addr, IS_USER(s));
8648                        store_reg(s, i, tmp);
8649                    } else {
8650                        /* push */
8651                        tmp = load_reg(s, i);
8652                        gen_st32(tmp, addr, IS_USER(s));
8653                    }
8654                    /* advance to the next address.  */
8655                    tcg_gen_addi_i32(addr, addr, 4);
8656                }
8657            }
8658            TCGV_UNUSED(tmp);
8659            if (insn & (1 << 8)) {
8660                if (insn & (1 << 11)) {
8661                    /* pop pc */
8662                    tmp = gen_ld32(addr, IS_USER(s));
8663                    /* don't set the pc until the rest of the instruction
8664                       has completed */
8665                } else {
8666                    /* push lr */
8667                    tmp = load_reg(s, 14);
8668                    gen_st32(tmp, addr, IS_USER(s));
8669                }
8670                tcg_gen_addi_i32(addr, addr, 4);
8671            }
8672            if ((insn & (1 << 11)) == 0) {
8673                tcg_gen_addi_i32(addr, addr, -offset);
8674            }
8675            /* write back the new stack pointer */
8676            store_reg(s, 13, addr);
8677            /* set the new PC value */
8678            if ((insn & 0x0900) == 0x0900)
8679                gen_bx(s, tmp);
8680            break;
8681
8682        case 1: case 3: case 9: case 11: /* czb */
8683            rm = insn & 7;
8684            tmp = load_reg(s, rm);
8685            s->condlabel = gen_new_label();
8686            s->condjmp = 1;
8687            if (insn & (1 << 11))
8688                tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel);
8689            else
8690                tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel);
8691            dead_tmp(tmp);
8692            offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3;
8693            val = (uint32_t)s->pc + 2;
8694            val += offset;
8695            gen_jmp(s, val);
8696            break;
8697
8698        case 15: /* IT, nop-hint.  */
8699            if ((insn & 0xf) == 0) {
8700                gen_nop_hint(s, (insn >> 4) & 0xf);
8701                break;
8702            }
8703            /* If Then.  */
8704            s->condexec_cond = (insn >> 4) & 0xe;
8705            s->condexec_mask = insn & 0x1f;
8706            /* No actual code generated for this insn, just setup state.  */
8707            break;
8708
8709        case 0xe: /* bkpt */
8710            gen_set_condexec(s);
8711            gen_set_pc_im(s->pc - 2);
8712            gen_exception(EXCP_BKPT);
8713            s->is_jmp = DISAS_JUMP;
8714            break;
8715
8716        case 0xa: /* rev */
8717            ARCH(6);
8718            rn = (insn >> 3) & 0x7;
8719            rd = insn & 0x7;
8720            tmp = load_reg(s, rn);
8721            switch ((insn >> 6) & 3) {
8722            case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
8723            case 1: gen_rev16(tmp); break;
8724            case 3: gen_revsh(tmp); break;
8725            default: goto illegal_op;
8726            }
8727            store_reg(s, rd, tmp);
8728            break;
8729
8730        case 6: /* cps */
8731            ARCH(6);
8732            if (IS_USER(s))
8733                break;
8734            if (IS_M(env)) {
8735                tmp = tcg_const_i32((insn & (1 << 4)) != 0);
8736                /* PRIMASK */
8737                if (insn & 1) {
8738                    addr = tcg_const_i32(16);
8739                    gen_helper_v7m_msr(cpu_env, addr, tmp);
8740                }
8741                /* FAULTMASK */
8742                if (insn & 2) {
8743                    addr = tcg_const_i32(17);
8744                    gen_helper_v7m_msr(cpu_env, addr, tmp);
8745                }
8746                gen_lookup_tb(s);
8747            } else {
8748                if (insn & (1 << 4))
8749                    shift = CPSR_A | CPSR_I | CPSR_F;
8750                else
8751                    shift = 0;
8752
8753                val = ((insn & 7) << 6) & shift;
8754                gen_op_movl_T0_im(val);
8755                gen_set_psr_T0(s, shift, 0);
8756            }
8757            break;
8758
8759        default:
8760            goto undef;
8761        }
8762        break;
8763
8764    case 12:
8765        /* load/store multiple */
8766        rn = (insn >> 8) & 0x7;
8767        addr = load_reg(s, rn);
8768        for (i = 0; i < 8; i++) {
8769            if (insn & (1 << i)) {
8770                if (insn & (1 << 11)) {
8771                    /* load */
8772                    tmp = gen_ld32(addr, IS_USER(s));
8773                    store_reg(s, i, tmp);
8774                } else {
8775                    /* store */
8776                    tmp = load_reg(s, i);
8777                    gen_st32(tmp, addr, IS_USER(s));
8778                }
8779                /* advance to the next address */
8780                tcg_gen_addi_i32(addr, addr, 4);
8781            }
8782        }
8783        /* Base register writeback.  */
8784        if ((insn & (1 << rn)) == 0) {
8785            store_reg(s, rn, addr);
8786        } else {
8787            dead_tmp(addr);
8788        }
8789        break;
8790
8791    case 13:
8792        /* conditional branch or swi */
8793        cond = (insn >> 8) & 0xf;
8794        if (cond == 0xe)
8795            goto undef;
8796
8797        if (cond == 0xf) {
8798            /* swi */
8799            gen_set_condexec(s);
8800            gen_set_pc_im(s->pc);
8801            s->is_jmp = DISAS_SWI;
8802            break;
8803        }
8804        /* generate a conditional jump to next instruction */
8805        s->condlabel = gen_new_label();
8806        gen_test_cc(cond ^ 1, s->condlabel);
8807        s->condjmp = 1;
8808        gen_movl_T1_reg(s, 15);
8809
8810        /* jump to the offset */
8811        val = (uint32_t)s->pc + 2;
8812        offset = ((int32_t)insn << 24) >> 24;
8813        val += offset << 1;
8814        gen_jmp(s, val);
8815        break;
8816
8817    case 14:
8818        if (insn & (1 << 11)) {
8819            if (disas_thumb2_insn(env, s, insn))
8820              goto undef32;
8821            break;
8822        }
8823        /* unconditional branch */
8824        val = (uint32_t)s->pc;
8825        offset = ((int32_t)insn << 21) >> 21;
8826        val += (offset << 1) + 2;
8827        gen_jmp(s, val);
8828        break;
8829
8830    case 15:
8831        if (disas_thumb2_insn(env, s, insn))
8832            goto undef32;
8833        break;
8834    }
8835    return;
8836undef32:
8837    gen_set_condexec(s);
8838    gen_set_pc_im(s->pc - 4);
8839    gen_exception(EXCP_UDEF);
8840    s->is_jmp = DISAS_JUMP;
8841    return;
8842illegal_op:
8843undef:
8844    gen_set_condexec(s);
8845    gen_set_pc_im(s->pc - 2);
8846    gen_exception(EXCP_UDEF);
8847    s->is_jmp = DISAS_JUMP;
8848}
8849
8850/* generate intermediate code in gen_opc_buf and gen_opparam_buf for
8851   basic block 'tb'. If search_pc is TRUE, also generate PC
8852   information for each intermediate instruction. */
8853static inline void gen_intermediate_code_internal(CPUState *env,
8854                                                  TranslationBlock *tb,
8855                                                  int search_pc)
8856{
8857    DisasContext dc1, *dc = &dc1;
8858    CPUBreakpoint *bp;
8859    uint16_t *gen_opc_end;
8860    int j, lj;
8861    target_ulong pc_start;
8862    uint32_t next_page_start;
8863    int num_insns;
8864    int max_insns;
8865
8866    /* generate intermediate code */
8867    num_temps = 0;
8868    memset(temps, 0, sizeof(temps));
8869
8870    pc_start = tb->pc;
8871
8872    dc->tb = tb;
8873
8874    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
8875
8876    dc->is_jmp = DISAS_NEXT;
8877    dc->pc = pc_start;
8878    dc->singlestep_enabled = env->singlestep_enabled;
8879    dc->condjmp = 0;
8880    dc->thumb = env->thumb;
8881    dc->condexec_mask = (env->condexec_bits & 0xf) << 1;
8882    dc->condexec_mask_prev = dc->condexec_mask;
8883    dc->condexec_cond = env->condexec_bits >> 4;
8884#if !defined(CONFIG_USER_ONLY)
8885    if (IS_M(env)) {
8886        dc->user = ((env->v7m.exception == 0) && (env->v7m.control & 1));
8887    } else {
8888        dc->user = (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_USR;
8889    }
8890#endif
8891#ifdef CONFIG_MEMCHECK
8892    dc->search_pc = search_pc;
8893#endif  // CONFIG_MEMCHECK
8894    cpu_F0s = tcg_temp_new_i32();
8895    cpu_F1s = tcg_temp_new_i32();
8896    cpu_F0d = tcg_temp_new_i64();
8897    cpu_F1d = tcg_temp_new_i64();
8898    cpu_V0 = cpu_F0d;
8899    cpu_V1 = cpu_F1d;
8900    /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
8901    cpu_M0 = tcg_temp_new_i64();
8902    next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
8903    lj = -1;
8904    num_insns = 0;
8905    max_insns = tb->cflags & CF_COUNT_MASK;
8906    if (max_insns == 0)
8907        max_insns = CF_COUNT_MASK;
8908
8909    gen_icount_start();
8910#ifdef CONFIG_TRACE
8911    if (tracing) {
8912        gen_traceBB(trace_static.bb_num, tb);
8913        trace_bb_start(dc->pc);
8914    }
8915#endif
8916
8917    do {
8918#ifdef CONFIG_USER_ONLY
8919        /* Intercept jump to the magic kernel page.  */
8920        if (dc->pc >= 0xffff0000) {
8921            /* We always get here via a jump, so know we are not in a
8922               conditional execution block.  */
8923            gen_exception(EXCP_KERNEL_TRAP);
8924            dc->is_jmp = DISAS_UPDATE;
8925            break;
8926        }
8927#else
8928        if (dc->pc >= 0xfffffff0 && IS_M(env)) {
8929            /* We always get here via a jump, so know we are not in a
8930               conditional execution block.  */
8931            gen_exception(EXCP_EXCEPTION_EXIT);
8932            dc->is_jmp = DISAS_UPDATE;
8933            break;
8934        }
8935#endif
8936
8937        if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
8938            QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
8939                if (bp->pc == dc->pc) {
8940                    gen_set_condexec(dc);
8941                    gen_set_pc_im(dc->pc);
8942                    gen_exception(EXCP_DEBUG);
8943                    dc->is_jmp = DISAS_JUMP;
8944                    /* Advance PC so that clearing the breakpoint will
8945                       invalidate this TB.  */
8946                    dc->pc += 2;
8947                    goto done_generating;
8948                    break;
8949                }
8950            }
8951        }
8952
8953#ifdef CONFIG_MEMCHECK
8954        /* When memchecker is enabled, we need to keep a match between
8955         * translated PC and guest PCs, so memchecker can quickly covert
8956         * one to another. Note that we do that only for user mode. */
8957        if (search_pc || (memcheck_enabled && dc->user)) {
8958#else   // CONFIG_MEMCHECK
8959        if (search_pc) {
8960#endif  // CONFIG_MEMCHECK
8961            j = gen_opc_ptr - gen_opc_buf;
8962            if (lj < j) {
8963                lj++;
8964                while (lj < j)
8965                    gen_opc_instr_start[lj++] = 0;
8966            }
8967            gen_opc_pc[lj] = dc->pc;
8968            gen_opc_instr_start[lj] = 1;
8969            gen_opc_icount[lj] = num_insns;
8970        }
8971
8972        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
8973            gen_io_start();
8974
8975        if (env->thumb) {
8976            disas_thumb_insn(env, dc);
8977            dc->condexec_mask_prev = dc->condexec_mask;
8978            if (dc->condexec_mask) {
8979                dc->condexec_cond = (dc->condexec_cond & 0xe)
8980                                   | ((dc->condexec_mask >> 4) & 1);
8981                dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
8982                if (dc->condexec_mask == 0) {
8983                    dc->condexec_cond = 0;
8984                }
8985            }
8986        } else {
8987            disas_arm_insn(env, dc);
8988        }
8989        if (num_temps) {
8990            fprintf(stderr, "Internal resource leak before %08x (%d temps)\n", dc->pc, num_temps);
8991            tcg_dump_ops(&tcg_ctx, stderr);
8992            num_temps = 0;
8993        }
8994
8995        if (dc->condjmp && !dc->is_jmp) {
8996            gen_set_label(dc->condlabel);
8997            dc->condjmp = 0;
8998        }
8999        /* Translation stops when a conditional branch is encountered.
9000         * Otherwise the subsequent code could get translated several times.
9001         * Also stop translation when a page boundary is reached.  This
9002         * ensures prefetch aborts occur at the right place.  */
9003        num_insns ++;
9004    } while (!dc->is_jmp && gen_opc_ptr < gen_opc_end &&
9005             !env->singlestep_enabled &&
9006             !singlestep &&
9007             dc->pc < next_page_start &&
9008             num_insns < max_insns);
9009
9010#ifdef CONFIG_TRACE
9011    if (tracing) {
9012        trace_bb_end();
9013    }
9014#endif
9015
9016    if (tb->cflags & CF_LAST_IO) {
9017        if (dc->condjmp) {
9018            /* FIXME:  This can theoretically happen with self-modifying
9019               code.  */
9020            cpu_abort(env, "IO on conditional branch instruction");
9021        }
9022        gen_io_end();
9023    }
9024
9025    /* At this stage dc->condjmp will only be set when the skipped
9026       instruction was a conditional branch or trap, and the PC has
9027       already been written.  */
9028    if (unlikely(env->singlestep_enabled)) {
9029        /* Make sure the pc is updated, and raise a debug exception.  */
9030        if (dc->condjmp) {
9031            gen_set_condexec(dc);
9032            if (dc->is_jmp == DISAS_SWI) {
9033                gen_exception(EXCP_SWI);
9034            } else {
9035                gen_exception(EXCP_DEBUG);
9036            }
9037            gen_set_label(dc->condlabel);
9038        }
9039        if (dc->condjmp || !dc->is_jmp) {
9040            gen_set_pc_im(dc->pc);
9041            dc->condjmp = 0;
9042        }
9043        gen_set_condexec(dc);
9044        if (dc->is_jmp == DISAS_SWI && !dc->condjmp) {
9045            gen_exception(EXCP_SWI);
9046        } else {
9047            /* FIXME: Single stepping a WFI insn will not halt
9048               the CPU.  */
9049            gen_exception(EXCP_DEBUG);
9050        }
9051    } else {
9052        /* While branches must always occur at the end of an IT block,
9053           there are a few other things that can cause us to terminate
9054           the TB in the middel of an IT block:
9055            - Exception generating instructions (bkpt, swi, undefined).
9056            - Page boundaries.
9057            - Hardware watchpoints.
9058           Hardware breakpoints have already been handled and skip this code.
9059         */
9060        gen_set_condexec(dc);
9061        switch(dc->is_jmp) {
9062        case DISAS_NEXT:
9063            gen_goto_tb(dc, 1, dc->pc);
9064            break;
9065        default:
9066        case DISAS_JUMP:
9067        case DISAS_UPDATE:
9068            /* indicate that the hash table must be used to find the next TB */
9069            tcg_gen_exit_tb(0);
9070            break;
9071        case DISAS_TB_JUMP:
9072            /* nothing more to generate */
9073            break;
9074        case DISAS_WFI:
9075            gen_helper_wfi();
9076            break;
9077        case DISAS_SWI:
9078            gen_exception(EXCP_SWI);
9079            break;
9080        }
9081        if (dc->condjmp) {
9082            gen_set_label(dc->condlabel);
9083            gen_set_condexec(dc);
9084            gen_goto_tb(dc, 1, dc->pc);
9085            dc->condjmp = 0;
9086        }
9087    }
9088
9089done_generating:
9090    gen_icount_end(tb, num_insns);
9091    *gen_opc_ptr = INDEX_op_end;
9092
9093#ifdef DEBUG_DISAS
9094    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
9095        qemu_log("----------------\n");
9096        qemu_log("IN: %s\n", lookup_symbol(pc_start));
9097        log_target_disas(pc_start, dc->pc - pc_start, env->thumb);
9098        qemu_log("\n");
9099    }
9100#endif
9101    if (search_pc) {
9102        j = gen_opc_ptr - gen_opc_buf;
9103        lj++;
9104        while (lj <= j)
9105            gen_opc_instr_start[lj++] = 0;
9106    } else {
9107#ifdef CONFIG_MEMCHECK
9108        if (memcheck_enabled && dc->user) {
9109            j = gen_opc_ptr - gen_opc_buf;
9110            lj++;
9111            while (lj <= j)
9112                gen_opc_instr_start[lj++] = 0;
9113        }
9114#endif  // CONFIG_MEMCHECK
9115        tb->size = dc->pc - pc_start;
9116        tb->icount = num_insns;
9117    }
9118}
9119
9120void gen_intermediate_code(CPUState *env, TranslationBlock *tb)
9121{
9122    gen_intermediate_code_internal(env, tb, 0);
9123}
9124
9125void gen_intermediate_code_pc(CPUState *env, TranslationBlock *tb)
9126{
9127    gen_intermediate_code_internal(env, tb, 1);
9128}
9129
9130static const char *cpu_mode_names[16] = {
9131  "usr", "fiq", "irq", "svc", "???", "???", "???", "abt",
9132  "???", "???", "???", "und", "???", "???", "???", "sys"
9133};
9134
9135void cpu_dump_state(CPUState *env, FILE *f,
9136                    int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
9137                    int flags)
9138{
9139    int i;
9140#if 0
9141    union {
9142        uint32_t i;
9143        float s;
9144    } s0, s1;
9145    CPU_DoubleU d;
9146    /* ??? This assumes float64 and double have the same layout.
9147       Oh well, it's only debug dumps.  */
9148    union {
9149        float64 f64;
9150        double d;
9151    } d0;
9152#endif
9153    uint32_t psr;
9154
9155    for(i=0;i<16;i++) {
9156        cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
9157        if ((i % 4) == 3)
9158            cpu_fprintf(f, "\n");
9159        else
9160            cpu_fprintf(f, " ");
9161    }
9162    psr = cpsr_read(env);
9163    cpu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%d\n",
9164                psr,
9165                psr & (1 << 31) ? 'N' : '-',
9166                psr & (1 << 30) ? 'Z' : '-',
9167                psr & (1 << 29) ? 'C' : '-',
9168                psr & (1 << 28) ? 'V' : '-',
9169                psr & CPSR_T ? 'T' : 'A',
9170                cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26);
9171
9172#if 0
9173    for (i = 0; i < 16; i++) {
9174        d.d = env->vfp.regs[i];
9175        s0.i = d.l.lower;
9176        s1.i = d.l.upper;
9177        d0.f64 = d.d;
9178        cpu_fprintf(f, "s%02d=%08x(%8g) s%02d=%08x(%8g) d%02d=%08x%08x(%8g)\n",
9179                    i * 2, (int)s0.i, s0.s,
9180                    i * 2 + 1, (int)s1.i, s1.s,
9181                    i, (int)(uint32_t)d.l.upper, (int)(uint32_t)d.l.lower,
9182                    d0.d);
9183    }
9184    cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
9185#endif
9186}
9187
9188void gen_pc_load(CPUState *env, TranslationBlock *tb,
9189                unsigned long searched_pc, int pc_pos, void *puc)
9190{
9191    env->regs[15] = gen_opc_pc[pc_pos];
9192}
9193