LowerAlu.cpp revision 0c2dc522d0e120f346cf0a40c8cf0c93346131c2
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18/*! \file LowerAlu.cpp
19    \brief This file lowers ALU bytecodes.
20*/
21#include "libdex/DexOpcodes.h"
22#include "libdex/DexFile.h"
23#include "Lower.h"
24#include "NcgAot.h"
25#include "enc_wrapper.h"
26
27/////////////////////////////////////////////
28#define P_GPR_1 PhysicalReg_EBX
29//! lower bytecode NEG_INT
30
31//!
32int op_neg_int() {
33    u2 vA = INST_A(inst); //destination
34    u2 vB = INST_B(inst);
35    get_virtual_reg(vB, OpndSize_32, 1, false);
36    alu_unary_reg(OpndSize_32, neg_opc, 1, false);
37    set_virtual_reg(vA, OpndSize_32, 1, false);
38    rPC += 1;
39    return 0;
40}
41//! lower bytecode NOT_INT
42
43//!
44int op_not_int() {
45    u2 vA = INST_A(inst); //destination
46    u2 vB = INST_B(inst);
47    get_virtual_reg(vB, OpndSize_32, 1, false);
48    alu_unary_reg(OpndSize_32, not_opc, 1, false);
49    set_virtual_reg(vA, OpndSize_32, 1, false);
50    rPC += 1;
51    return 0;
52}
53#undef P_GPR_1
54//! lower bytecode NEG_LONG
55
56//! This implementation uses XMM registers
57int op_neg_long() {
58    u2 vA = INST_A(inst); //destination
59    u2 vB = INST_B(inst);
60    get_virtual_reg(vB, OpndSize_64, 1, false);
61    alu_binary_reg_reg(OpndSize_64, xor_opc, 2, false, 2, false);
62    alu_binary_reg_reg(OpndSize_64, sub_opc, 1, false, 2, false);
63    set_virtual_reg(vA, OpndSize_64, 2, false);
64    rPC += 1;
65    return 0;
66}
67//! lower bytecode NOT_LONG
68
69//! This implementation uses XMM registers
70int op_not_long() {
71    u2 vA = INST_A(inst); //destination
72    u2 vB = INST_B(inst);
73    get_virtual_reg(vB, OpndSize_64, 1, false);
74    load_global_data_API("64bits", OpndSize_64, 2, false);
75    alu_binary_reg_reg(OpndSize_64, andn_opc, 2, false, 1, false);
76    set_virtual_reg(vA, OpndSize_64, 1, false);
77    rPC += 1;
78    return 0;
79}
80#define P_GPR_1 PhysicalReg_EBX
81//! lower bytecode NEG_FLOAT
82
83//! This implementation uses GPR
84int op_neg_float() {
85    u2 vA = INST_A(inst); //destination
86    u2 vB = INST_B(inst);
87    get_virtual_reg(vB, OpndSize_32, 1, false);
88    alu_binary_imm_reg(OpndSize_32, add_opc, 0x80000000, 1, false);
89    set_virtual_reg(vA, OpndSize_32, 1, false);
90    rPC += 1;
91    return 0;
92}
93#undef P_GPR_1
94
95//! lower bytecode NEG_DOUBLE
96
97//! This implementation uses XMM registers
98int op_neg_double() {
99    u2 vA = INST_A(inst); //destination
100    u2 vB = INST_B(inst);
101    get_virtual_reg(vB, OpndSize_64, 1, false);
102    load_global_data_API("doubNeg", OpndSize_64, 2, false);
103    alu_binary_reg_reg(OpndSize_64, xor_opc, 1, false, 2, false);
104    set_virtual_reg(vA, OpndSize_64, 2, false);
105    rPC += 1;
106    return 0;
107}
108
109//! lower bytecode INT_TO_LONG
110
111//! It uses native instruction cdq
112int op_int_to_long() {
113    u2 vA = INST_A(inst); //destination
114    u2 vB = INST_B(inst);
115    get_virtual_reg(vB, OpndSize_32, PhysicalReg_EAX, true);
116    convert_integer(OpndSize_32, OpndSize_64);
117    set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
118    set_virtual_reg(vA+1, OpndSize_32, PhysicalReg_EDX, true);
119    rPC += 1;
120    return 0;
121}
122//! lower bytecode INT_TO_FLOAT
123
124//! This implementation uses FP stack
125int op_int_to_float() {
126    u2 vA = INST_A(inst); //destination
127    u2 vB = INST_B(inst);
128    load_int_fp_stack_VR(OpndSize_32, vB); //fildl
129    store_fp_stack_VR(true, OpndSize_32, vA); //fstps
130    rPC += 1;
131    return 0;
132}
133//! lower bytecode INT_TO_DOUBLE
134
135//! This implementation uses FP stack
136int op_int_to_double() {
137    u2 vA = INST_A(inst); //destination
138    u2 vB = INST_B(inst);
139    load_int_fp_stack_VR(OpndSize_32, vB); //fildl
140    store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
141    rPC += 1;
142    return 0;
143}
144//! lower bytecode LONG_TO_FLOAT
145
146//! This implementation uses FP stack
147int op_long_to_float() {
148    u2 vA = INST_A(inst); //destination
149    u2 vB = INST_B(inst);
150    load_int_fp_stack_VR(OpndSize_64, vB); //fildll
151    store_fp_stack_VR(true, OpndSize_32, vA); //fstps
152    rPC += 1;
153    return 0;
154}
155//! lower bytecode LONG_TO_DOUBLE
156
157//! This implementation uses FP stack
158int op_long_to_double() {
159    u2 vA = INST_A(inst); //destination
160    u2 vB = INST_B(inst);
161    load_int_fp_stack_VR(OpndSize_64, vB); //fildll
162    store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
163    rPC += 1;
164    return 0;
165}
166//! lower bytecode FLOAT_TO_DOUBLE
167
168//! This implementation uses FP stack
169int op_float_to_double() {
170    u2 vA = INST_A(inst); //destination
171    u2 vB = INST_B(inst);
172    load_fp_stack_VR(OpndSize_32, vB); //flds
173    store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
174    rPC += 1;
175    return 0;
176}
177//! lower bytecode DOUBLE_TO_FLOAT
178
179//! This implementation uses FP stack
180int op_double_to_float() {
181    u2 vA = INST_A(inst); //destination
182    u2 vB = INST_B(inst);
183    load_fp_stack_VR(OpndSize_64, vB); //fldl
184    store_fp_stack_VR(true, OpndSize_32, vA); //fstps
185    rPC += 1;
186    return 0;
187}
188#define P_GPR_1 PhysicalReg_EBX
189//! lower bytecode LONG_TO_INT
190
191//! This implementation uses GPR
192int op_long_to_int() {
193    u2 vA = INST_A(inst); //destination
194    u2 vB = INST_B(inst);
195    get_virtual_reg(vB, OpndSize_32, 1, false);
196    set_virtual_reg(vA, OpndSize_32, 1, false);
197    rPC += 1;
198    return 0;
199}
200#undef P_GPR_1
201
202//! common code to convert a float or double to integer
203
204//! It uses FP stack
205int common_fp_to_int(bool isDouble, u2 vA, u2 vB) {
206    if(isDouble) {
207        load_fp_stack_VR(OpndSize_64, vB); //fldl
208    }
209    else {
210        load_fp_stack_VR(OpndSize_32, vB); //flds
211    }
212
213    load_fp_stack_global_data_API("intMax", OpndSize_32);
214    load_fp_stack_global_data_API("intMin", OpndSize_32);
215
216    //ST(0) ST(1) ST(2) --> LintMin LintMax value
217    compare_fp_stack(true, 2, false/*isDouble*/); //ST(2)
218    //ST(0) ST(1) --> LintMax value
219    conditional_jump(Condition_AE, ".float_to_int_negInf", true);
220    rememberState(1);
221    compare_fp_stack(true, 1, false/*isDouble*/); //ST(1)
222    //ST(0) --> value
223    rememberState(2);
224    conditional_jump(Condition_C, ".float_to_int_nanInf", true);
225    //fnstcw, orw, fldcw, xorw
226    load_effective_addr(-2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
227    store_fpu_cw(false/*checkException*/, 0, PhysicalReg_ESP, true);
228    alu_binary_imm_mem(OpndSize_16, or_opc, 0xc00, 0, PhysicalReg_ESP, true);
229    load_fpu_cw(0, PhysicalReg_ESP, true);
230    alu_binary_imm_mem(OpndSize_16, xor_opc, 0xc00, 0, PhysicalReg_ESP, true);
231    store_int_fp_stack_VR(true/*pop*/, OpndSize_32, vA); //fistpl
232    //fldcw
233    load_fpu_cw(0, PhysicalReg_ESP, true);
234    load_effective_addr(2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
235    rememberState(3);
236    unconditional_jump(".float_to_int_okay", true);
237    insertLabel(".float_to_int_nanInf", true);
238    conditional_jump(Condition_NP, ".float_to_int_posInf", true);
239    //fstps CHECK
240    goToState(2);
241    store_fp_stack_VR(true, OpndSize_32, vA);
242    set_VR_to_imm(vA, OpndSize_32, 0);
243    transferToState(3);
244    unconditional_jump(".float_to_int_okay", true);
245    insertLabel(".float_to_int_posInf", true);
246    //fstps CHECK
247    goToState(2);
248    store_fp_stack_VR(true, OpndSize_32, vA);
249    set_VR_to_imm(vA, OpndSize_32, 0x7fffffff);
250    transferToState(3);
251    unconditional_jump(".float_to_int_okay", true);
252    insertLabel(".float_to_int_negInf", true);
253    goToState(1);
254    //fstps CHECK
255    store_fp_stack_VR(true, OpndSize_32, vA);
256    store_fp_stack_VR(true, OpndSize_32, vA);
257    set_VR_to_imm(vA, OpndSize_32, 0x80000000);
258    transferToState(3);
259    insertLabel(".float_to_int_okay", true);
260    return 0;
261}
262//! lower bytecode FLOAT_TO_INT by calling common_fp_to_int
263
264//!
265int op_float_to_int() {
266    u2 vA = INST_A(inst); //destination
267    u2 vB = INST_B(inst);
268    int retval = common_fp_to_int(false, vA, vB);
269    rPC += 1;
270    return retval;
271}
272//! lower bytecode DOUBLE_TO_INT by calling common_fp_to_int
273
274//!
275int op_double_to_int() {
276    u2 vA = INST_A(inst); //destination
277    u2 vB = INST_B(inst);
278    int retval = common_fp_to_int(true, vA, vB);
279    rPC += 1;
280    return retval;
281}
282
283//! common code to convert float or double to long
284
285//! It uses FP stack
286int common_fp_to_long(bool isDouble, u2 vA, u2 vB) {
287    if(isDouble) {
288        load_fp_stack_VR(OpndSize_64, vB); //fldl
289    }
290    else {
291        load_fp_stack_VR(OpndSize_32, vB); //flds
292    }
293
294    load_fp_stack_global_data_API("valuePosInfLong", OpndSize_64);
295    load_fp_stack_global_data_API("valueNegInfLong", OpndSize_64);
296
297    //ST(0) ST(1) ST(2) --> LintMin LintMax value
298    compare_fp_stack(true, 2, false/*isDouble*/); //ST(2)
299    //ST(0) ST(1) --> LintMax value
300    conditional_jump(Condition_AE, ".float_to_long_negInf", true);
301    rememberState(1);
302    compare_fp_stack(true, 1, false/*isDouble*/); //ST(1)
303    rememberState(2);
304    //ST(0) --> value
305    conditional_jump(Condition_C, ".float_to_long_nanInf", true);
306    //fnstcw, orw, fldcw, xorw
307    load_effective_addr(-2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
308    store_fpu_cw(false/*checkException*/, 0, PhysicalReg_ESP, true);
309    alu_binary_imm_mem(OpndSize_16, or_opc, 0xc00, 0, PhysicalReg_ESP, true);
310    load_fpu_cw(0, PhysicalReg_ESP, true);
311    alu_binary_imm_mem(OpndSize_16, xor_opc, 0xc00, 0, PhysicalReg_ESP, true);
312    store_int_fp_stack_VR(true/*pop*/, OpndSize_64, vA); //fistpll
313    //fldcw
314    load_fpu_cw(0, PhysicalReg_ESP, true);
315    load_effective_addr(2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
316    rememberState(3);
317    unconditional_jump(".float_to_long_okay", true);
318    insertLabel(".float_to_long_nanInf", true);
319    conditional_jump(Condition_NP, ".float_to_long_posInf", true);
320    //fstpl??
321    goToState(2);
322
323    load_global_data_API("valueNanLong", OpndSize_64, 1, false);
324
325    set_virtual_reg(vA, OpndSize_64, 1, false);
326    transferToState(3);
327    unconditional_jump(".float_to_long_okay", true);
328    insertLabel(".float_to_long_posInf", true);
329    //fstpl
330    goToState(2);
331
332    load_global_data_API("valuePosInfLong", OpndSize_64, 2, false);
333    set_virtual_reg(vA, OpndSize_64, 2, false);
334    transferToState(3);
335    unconditional_jump(".float_to_long_okay", true);
336    insertLabel(".float_to_long_negInf", true);
337    //fstpl
338    //fstpl
339    goToState(1);
340
341    load_global_data_API("valueNegInfLong", OpndSize_64, 3, false);
342    set_virtual_reg(vA, OpndSize_64, 3, false);
343    transferToState(3);
344    insertLabel(".float_to_long_okay", true);
345    return 0;
346}
347//! lower bytecode FLOAT_TO_LONG by calling common_fp_to_long
348
349//!
350int op_float_to_long() {
351    u2 vA = INST_A(inst); //destination
352    u2 vB = INST_B(inst);
353    int retval = common_fp_to_long(false, vA, vB);
354    rPC += 1;
355    return retval;
356}
357//! lower bytecode DOUBLE_TO_LONG by calling common_fp_to_long
358
359//!
360int op_double_to_long() {
361    u2 vA = INST_A(inst); //destination
362    u2 vB = INST_B(inst);
363    int retval = common_fp_to_long(true, vA, vB);
364    rPC += 1;
365    return retval;
366}
367#define P_GPR_1 PhysicalReg_EBX
368//! lower bytecode INT_TO_BYTE
369
370//! It uses GPR
371int op_int_to_byte() {
372    u2 vA = INST_A(inst); //destination
373    u2 vB = INST_B(inst);
374    get_virtual_reg(vB, OpndSize_32, 1, false);
375    alu_binary_imm_reg(OpndSize_32, sal_opc, 24, 1, false);
376    alu_binary_imm_reg(OpndSize_32, sar_opc, 24, 1, false);
377    set_virtual_reg(vA, OpndSize_32, 1, false);
378    rPC += 1;
379    return 0;
380}
381//! lower bytecode INT_TO_CHAR
382
383//! It uses GPR
384int op_int_to_char() {
385    u2 vA = INST_A(inst); //destination
386    u2 vB = INST_B(inst);
387    get_virtual_reg(vB, OpndSize_32, 1, false);
388    alu_binary_imm_reg(OpndSize_32, sal_opc, 16, 1, false);
389    alu_binary_imm_reg(OpndSize_32, shr_opc, 16, 1, false);
390    set_virtual_reg(vA, OpndSize_32, 1, false);
391    rPC += 1;
392    return 0;
393}
394//! lower bytecode INT_TO_SHORT
395
396//! It uses GPR
397int op_int_to_short() {
398    u2 vA = INST_A(inst); //destination
399    u2 vB = INST_B(inst);
400    get_virtual_reg(vB, OpndSize_32, 1, false);
401    alu_binary_imm_reg(OpndSize_32, sal_opc, 16, 1, false);
402    alu_binary_imm_reg(OpndSize_32, sar_opc, 16, 1, false);
403    set_virtual_reg(vA, OpndSize_32, 1, false);
404    rPC += 1;
405    return 0;
406}
407//! common code to handle integer ALU ops
408
409//! It uses GPR
410int common_alu_int(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) { //except div and rem
411    get_virtual_reg(v1, OpndSize_32, 1, false);
412    //in encoder, reg is first operand, which is the destination
413    //gpr_1 op v2(rFP) --> gpr_1
414    //shift only works with reg cl, v2 should be in %ecx
415    alu_binary_VR_reg(OpndSize_32, opc, v2, 1, false);
416    set_virtual_reg(vA, OpndSize_32, 1, false);
417    return 0;
418}
419#undef P_GPR_1
420#define P_GPR_1 PhysicalReg_EBX
421//! common code to handle integer shift ops
422
423//! It uses GPR
424int common_shift_int(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) {
425    get_virtual_reg(v2, OpndSize_32, PhysicalReg_ECX, true);
426    get_virtual_reg(v1, OpndSize_32, 1, false);
427    //in encoder, reg2 is first operand, which is the destination
428    //gpr_1 op v2(rFP) --> gpr_1
429    //shift only works with reg cl, v2 should be in %ecx
430    alu_binary_reg_reg(OpndSize_32, opc, PhysicalReg_ECX, true, 1, false);
431    set_virtual_reg(vA, OpndSize_32, 1, false);
432    return 0;
433}
434#undef p_GPR_1
435//! lower bytecode ADD_INT by calling common_alu_int
436
437//!
438int op_add_int() {
439    u2 vA, v1, v2;
440    vA = INST_AA(inst);
441    v1 = *((u1*)rPC + 2);
442    v2 = *((u1*)rPC + 3);
443    int retval = common_alu_int(add_opc, vA, v1, v2);
444    rPC += 2;
445    return retval;
446}
447//! lower bytecode SUB_INT by calling common_alu_int
448
449//!
450int op_sub_int() {
451    u2 vA, v1, v2;
452    vA = INST_AA(inst);
453    v1 = *((u1*)rPC + 2);
454    v2 = *((u1*)rPC + 3);
455    int retval = common_alu_int(sub_opc, vA, v1, v2);
456    rPC += 2;
457    return retval;
458}
459//! lower bytecode MUL_INT by calling common_alu_int
460
461//!
462int op_mul_int() {
463    u2 vA, v1, v2;
464    vA = INST_AA(inst);
465    v1 = *((u1*)rPC + 2);
466    v2 = *((u1*)rPC + 3);
467    int retval = common_alu_int(imul_opc, vA, v1, v2);
468    rPC += 2;
469    return retval;
470}
471//! lower bytecode AND_INT by calling common_alu_int
472
473//!
474int op_and_int() {
475    u2 vA, v1, v2;
476    vA = INST_AA(inst);
477    v1 = *((u1*)rPC + 2);
478    v2 = *((u1*)rPC + 3);
479    int retval = common_alu_int(and_opc, vA, v1, v2);
480    rPC += 2;
481    return retval;
482}
483//! lower bytecode OR_INT by calling common_alu_int
484
485//!
486int op_or_int() {
487    u2 vA, v1, v2;
488    vA = INST_AA(inst);
489    v1 = *((u1*)rPC + 2);
490    v2 = *((u1*)rPC + 3);
491    int retval = common_alu_int(or_opc, vA, v1, v2);
492    rPC += 2;
493    return retval;
494}
495//! lower bytecode XOR_INT by calling common_alu_int
496
497//!
498int op_xor_int() {
499    u2 vA, v1, v2;
500    vA = INST_AA(inst);
501    v1 = *((u1*)rPC + 2);
502    v2 = *((u1*)rPC + 3);
503    int retval = common_alu_int(xor_opc, vA, v1, v2);
504    rPC += 2;
505    return retval;
506}
507//! lower bytecode SHL_INT by calling common_shift_int
508
509//!
510int op_shl_int() {
511    u2 vA, v1, v2;
512    vA = INST_AA(inst);
513    v1 = *((u1*)rPC + 2);
514    v2 = *((u1*)rPC + 3);
515    int retval = common_shift_int(shl_opc, vA, v1, v2);
516    rPC += 2;
517    return retval;
518}
519//! lower bytecode SHR_INT by calling common_shift_int
520
521//!
522int op_shr_int() {
523    u2 vA, v1, v2;
524    vA = INST_AA(inst);
525    v1 = *((u1*)rPC + 2);
526    v2 = *((u1*)rPC + 3);
527    int retval = common_shift_int(sar_opc, vA, v1, v2);
528    rPC += 2;
529    return retval;
530}
531//! lower bytecode USHR_INT by calling common_shift_int
532
533//!
534int op_ushr_int() {
535    u2 vA, v1, v2;
536    vA = INST_AA(inst);
537    v1 = *((u1*)rPC + 2);
538    v2 = *((u1*)rPC + 3);
539    int retval = common_shift_int(shr_opc, vA, v1, v2);
540    rPC += 2;
541    return retval;
542}
543//! lower bytecode ADD_INT_2ADDR by calling common_alu_int
544
545//!
546int op_add_int_2addr() {
547    u2 vA, v1, v2;
548    vA = INST_A(inst);
549    v1 = vA;
550    v2 = INST_B(inst);
551    int retval = common_alu_int(add_opc, vA, v1, v2);
552    rPC += 1;
553    return retval;
554}
555//! lower bytecode SUB_INT_2ADDR by calling common_alu_int
556
557//!
558int op_sub_int_2addr() {
559    u2 vA, v1, v2;
560    vA = INST_A(inst);
561    v1 = vA;
562    v2 = INST_B(inst);
563    int retval = common_alu_int(sub_opc, vA, v1, v2);
564    rPC += 1;
565    return retval;
566}
567//! lower bytecode MUL_INT_2ADDR by calling common_alu_int
568
569//!
570int op_mul_int_2addr() {
571    u2 vA, v1, v2;
572    vA = INST_A(inst);
573    v1 = vA;
574    v2 = INST_B(inst);
575    int retval = common_alu_int(imul_opc, vA, v1, v2);
576    rPC += 1;
577    return retval;
578}
579//! lower bytecode AND_INT_2ADDR by calling common_alu_int
580
581//!
582int op_and_int_2addr() {
583    u2 vA, v1, v2;
584    vA = INST_A(inst);
585    v1 = vA;
586    v2 = INST_B(inst);
587    int retval = common_alu_int(and_opc, vA, v1, v2);
588    rPC += 1;
589    return retval;
590}
591//! lower bytecode OR_INT_2ADDR by calling common_alu_int
592
593//!
594int op_or_int_2addr() {
595    u2 vA, v1, v2;
596    vA = INST_A(inst);
597    v1 = vA;
598    v2 = INST_B(inst);
599    int retval = common_alu_int(or_opc, vA, v1, v2);
600    rPC += 1;
601    return retval;
602}
603//! lower bytecode XOR_INT_2ADDR by calling common_alu_int
604
605//!
606int op_xor_int_2addr() {
607    u2 vA, v1, v2;
608    vA = INST_A(inst);
609    v1 = vA;
610    v2 = INST_B(inst);
611    int retval = common_alu_int(xor_opc, vA, v1, v2);
612    rPC += 1;
613    return retval;
614}
615//! lower bytecode SHL_INT_2ADDR by calling common_shift_int
616
617//!
618int op_shl_int_2addr() {
619    u2 vA, v1, v2;
620    vA = INST_A(inst);
621    v1 = vA;
622    v2 = INST_B(inst);
623    int retval = common_shift_int(shl_opc, vA, v1, v2);
624    rPC += 1;
625    return retval;
626}
627//! lower bytecode SHR_INT_2ADDR by calling common_shift_int
628
629//!
630int op_shr_int_2addr() {
631    u2 vA, v1, v2;
632    vA = INST_A(inst);
633    v1 = vA;
634    v2 = INST_B(inst);
635    int retval = common_shift_int(sar_opc, vA, v1, v2);
636    rPC += 1;
637    return retval;
638}
639//! lower bytecode USHR_INT_2ADDR by calling common_shift_int
640
641//!
642int op_ushr_int_2addr() {
643    u2 vA, v1, v2;
644    vA = INST_A(inst);
645    v1 = vA;
646    v2 = INST_B(inst);
647    int retval = common_shift_int(shr_opc, vA, v1, v2);
648    rPC += 1;
649    return retval;
650}
651#define P_GPR_1 PhysicalReg_EBX
652//!common code to handle integer DIV & REM, it used GPR
653
654//!The special case: when op0 == minint && op1 == -1, return 0 for isRem, return 0x80000000 for isDiv
655//!There are two merge points in the control flow for this bytecode
656//!make sure the reg. alloc. state is the same at merge points by calling transferToState
657int common_div_rem_int(bool isRem, u2 vA, u2 v1, u2 v2) {
658    get_virtual_reg(v1, OpndSize_32, PhysicalReg_EAX, true);
659    get_virtual_reg(v2, OpndSize_32, 2, false);
660    compare_imm_reg(OpndSize_32, 0, 2, false);
661    handlePotentialException(
662                                       Condition_E, Condition_NE,
663                                       1, "common_errDivideByZero");
664    /////////////////// handle special cases
665    //conditional move 0 to $edx for rem for the two special cases
666    //conditional move 0x80000000 to $eax for div
667    //handle -1 special case divide error
668    compare_imm_reg(OpndSize_32, -1, 2, false);
669    conditional_jump(Condition_NE, ".common_div_rem_int_normal", true);
670    //handle min int special case divide error
671    rememberState(1);
672    compare_imm_reg(OpndSize_32, 0x80000000, PhysicalReg_EAX, true);
673    transferToState(1);
674    conditional_jump(Condition_E, ".common_div_rem_int_special", true);
675
676    insertLabel(".common_div_rem_int_normal", true); //merge point
677    convert_integer(OpndSize_32, OpndSize_64); //cdq
678    //idiv: dividend in edx:eax; quotient in eax; remainder in edx
679    alu_unary_reg(OpndSize_32, idiv_opc, 2, false);
680    if(isRem)
681        set_virtual_reg(vA, OpndSize_32, PhysicalReg_EDX, true);
682    else //divide: quotient in %eax
683        set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
684    rememberState(2);
685    unconditional_jump(".common_div_rem_int_okay", true);
686
687    insertLabel(".common_div_rem_int_special", true);
688    goToState(1);
689    if(isRem)
690        set_VR_to_imm(vA, OpndSize_32, 0);
691    else
692        set_VR_to_imm(vA, OpndSize_32, 0x80000000);
693    transferToState(2);
694    insertLabel(".common_div_rem_int_okay", true); //merge point 2
695    return 0;
696}
697#undef P_GPR_1
698//! lower bytecode DIV_INT by calling common_div_rem_int
699
700//!
701int op_div_int() {
702    u2 vA, v1, v2;
703    vA = INST_AA(inst);
704    v1 = *((u1*)rPC + 2);
705    v2 = *((u1*)rPC + 3);
706    int retval = common_div_rem_int(false, vA, v1, v2);
707    rPC += 2;
708    return retval;
709}
710//! lower bytecode REM_INT by calling common_div_rem_int
711
712//!
713int op_rem_int() {
714    u2 vA, v1, v2;
715    vA = INST_AA(inst);
716    v1 = *((u1*)rPC + 2);
717    v2 = *((u1*)rPC + 3);
718    int retval = common_div_rem_int(true, vA, v1, v2);
719    rPC += 2;
720    return retval;
721}
722//! lower bytecode DIV_INT_2ADDR by calling common_div_rem_int
723
724//!
725int op_div_int_2addr() {
726    u2 vA = INST_A(inst);
727    u2 v1 = vA;
728    u2 v2 = INST_B(inst);
729    int retval = common_div_rem_int(false, vA, v1, v2);
730    rPC += 1;
731    return retval;
732}
733//! lower bytecode REM_INT_2ADDR by calling common_div_rem_int
734
735//!
736int op_rem_int_2addr() {
737    u2 vA = INST_A(inst);
738    u2 v1 = vA;
739    u2 v2 = INST_B(inst);
740    int retval = common_div_rem_int(true, vA, v1, v2);
741    rPC += 1;
742    return retval;
743}
744
745#define P_GPR_1 PhysicalReg_EBX
746//! common code to handle integer ALU ops with literal
747
748//! It uses GPR
749int common_alu_int_lit(ALU_Opcode opc, u2 vA, u2 vB, s2 imm) { //except div and rem
750    get_virtual_reg(vB, OpndSize_32, 1, false);
751    alu_binary_imm_reg(OpndSize_32, opc, imm, 1, false);
752    set_virtual_reg(vA, OpndSize_32, 1, false);
753    return 0;
754}
755//! calls common_alu_int_lit
756int common_shift_int_lit(ALU_Opcode opc, u2 vA, u2 vB, s2 imm) {
757    return common_alu_int_lit(opc, vA, vB, imm);
758}
759#undef p_GPR_1
760//! lower bytecode ADD_INT_LIT16 by calling common_alu_int_lit
761
762//!
763int op_add_int_lit16() {
764    u2 vA = INST_A(inst);
765    u2 vB = INST_B(inst);
766    s4 tmp = (s2)FETCH(1);
767    int retval = common_alu_int_lit(add_opc, vA, vB, tmp);
768    rPC += 2;
769    return retval;
770}
771
772int alu_rsub_int(ALU_Opcode opc, u2 vA, s2 imm, u2 vB) {
773    move_imm_to_reg(OpndSize_32, imm, 2, false);
774    get_virtual_reg(vB, OpndSize_32, 1, false);
775    alu_binary_reg_reg(OpndSize_32, opc, 1, false, 2, false);
776    set_virtual_reg(vA, OpndSize_32, 2, false);
777    return 0;
778}
779
780
781//! lower bytecode RSUB_INT by calling common_alu_int_lit
782
783//!
784int op_rsub_int() {
785    u2 vA = INST_A(inst);
786    u2 vB = INST_B(inst);
787    s4 tmp = (s2)FETCH(1);
788    int retval = alu_rsub_int(sub_opc, vA, tmp, vB);
789    rPC += 2;
790    return retval;
791}
792//! lower bytecode MUL_INT_LIT16 by calling common_alu_int_lit
793
794//!
795int op_mul_int_lit16() {
796    u2 vA = INST_A(inst);
797    u2 vB = INST_B(inst);
798    s4 tmp = (s2)FETCH(1);
799    int retval = common_alu_int_lit(imul_opc, vA, vB, tmp);
800    rPC += 2;
801    return retval;
802}
803//! lower bytecode AND_INT_LIT16 by calling common_alu_int_lit
804
805//!
806int op_and_int_lit16() {
807    u2 vA = INST_A(inst);
808    u2 vB = INST_B(inst);
809    s4 tmp = (s2)FETCH(1);
810    int retval = common_alu_int_lit(and_opc, vA, vB, tmp);
811    rPC += 2;
812    return retval;
813}
814//! lower bytecode OR_INT_LIT16 by calling common_alu_int_lit
815
816//!
817int op_or_int_lit16() {
818    u2 vA = INST_A(inst);
819    u2 vB = INST_B(inst);
820    s4 tmp = (s2)FETCH(1);
821    int retval = common_alu_int_lit(or_opc, vA, vB, tmp);
822    rPC += 2;
823    return retval;
824}
825//! lower bytecode XOR_INT_LIT16 by calling common_alu_int_lit
826
827//!
828int op_xor_int_lit16() {
829    u2 vA = INST_A(inst);
830    u2 vB = INST_B(inst);
831    s4 tmp = (s2)FETCH(1);
832    int retval = common_alu_int_lit(xor_opc, vA, vB, tmp);
833    rPC += 2;
834    return retval;
835}
836//! lower bytecode SHL_INT_LIT16 by calling common_shift_int_lit
837
838//!
839int op_shl_int_lit16() {
840    u2 vA = INST_A(inst);
841    u2 vB = INST_B(inst);
842    s4 tmp = (s2)FETCH(1);
843    int retval = common_shift_int_lit(shl_opc, vA, vB, tmp);
844    rPC += 2;
845    return retval;
846}
847//! lower bytecode SHR_INT_LIT16 by calling common_shift_int_lit
848
849//!
850int op_shr_int_lit16() {
851    u2 vA = INST_A(inst);
852    u2 vB = INST_B(inst);
853    s4 tmp = (s2)FETCH(1);
854    int retval = common_shift_int_lit(sar_opc, vA, vB, tmp);
855    rPC += 2;
856    return retval;
857}
858//! lower bytecode USHR_INT_LIT16 by calling common_shift_int_lit
859
860//!
861int op_ushr_int_lit16() {
862    u2 vA = INST_A(inst);
863    u2 vB = INST_B(inst);
864    s4 tmp = (s2)FETCH(1);
865    int retval = common_shift_int_lit(shr_opc, vA, vB, tmp);
866    rPC += 2;
867    return retval;
868}
869//! lower bytecode ADD_INT_LIT8 by calling common_alu_int_lit
870
871//!
872int op_add_int_lit8() {
873    u2 vA = INST_AA(inst);
874    u2 vB = (u2)FETCH(1) & 0xff;
875    s2 tmp = (s2)FETCH(1) >> 8;
876    int retval = common_alu_int_lit(add_opc, vA, vB, tmp);
877    rPC += 2;
878    return retval;
879}
880//! lower bytecode RSUB_INT_LIT8 by calling common_alu_int_lit
881
882//!
883int op_rsub_int_lit8() {
884    u2 vA = INST_AA(inst);
885    u2 vB = (u2)FETCH(1) & 0xff;
886    s2 tmp = (s2)FETCH(1) >> 8;
887    int retval = alu_rsub_int(sub_opc, vA, tmp, vB);
888    rPC += 2;
889    return retval;
890}
891//! lower bytecode MUL_INT_LIT8 by calling common_alu_int_lit
892
893//!
894int op_mul_int_lit8() {
895    u2 vA = INST_AA(inst);
896    u2 vB = (u2)FETCH(1) & 0xff;
897    s2 tmp = (s2)FETCH(1) >> 8;
898    int retval = common_alu_int_lit(imul_opc, vA, vB, tmp);
899    rPC += 2;
900    return retval;
901}
902//! lower bytecode AND_INT_LIT8 by calling common_alu_int_lit
903
904//!
905int op_and_int_lit8() {
906    u2 vA = INST_AA(inst);
907    u2 vB = (u2)FETCH(1) & 0xff;
908    s2 tmp = (s2)FETCH(1) >> 8;
909    int retval = common_alu_int_lit(and_opc, vA, vB, tmp);
910    rPC += 2;
911    return retval;
912}
913//! lower bytecode OR_INT_LIT8 by calling common_alu_int_lit
914
915//!
916int op_or_int_lit8() {
917    u2 vA = INST_AA(inst);
918    u2 vB = (u2)FETCH(1) & 0xff;
919    s2 tmp = (s2)FETCH(1) >> 8;
920    int retval = common_alu_int_lit(or_opc, vA, vB, tmp);
921    rPC += 2;
922    return retval;
923}
924//! lower bytecode XOR_INT_LIT8 by calling common_alu_int_lit
925
926//!
927int op_xor_int_lit8() {
928    u2 vA = INST_AA(inst);
929    u2 vB = (u2)FETCH(1) & 0xff;
930    s2 tmp = (s2)FETCH(1) >> 8;
931    int retval = common_alu_int_lit(xor_opc, vA, vB, tmp);
932    rPC += 2;
933    return retval;
934}
935//! lower bytecode SHL_INT_LIT8 by calling common_shift_int_lit
936
937//!
938int op_shl_int_lit8() {
939    u2 vA = INST_AA(inst);
940    u2 vB = (u2)FETCH(1) & 0xff;
941    s2 tmp = (s2)FETCH(1) >> 8;
942    int retval = common_shift_int_lit(shl_opc, vA, vB, tmp);
943    rPC += 2;
944    return retval;
945}
946//! lower bytecode SHR_INT_LIT8 by calling common_shift_int_lit
947
948//!
949int op_shr_int_lit8() {
950    u2 vA = INST_AA(inst);
951    u2 vB = (u2)FETCH(1) & 0xff;
952    s2 tmp = (s2)FETCH(1) >> 8;
953    int retval = common_shift_int_lit(sar_opc, vA, vB, tmp);
954    rPC += 2;
955    return retval;
956}
957//! lower bytecode USHR_INT_LIT8 by calling common_shift_int_lit
958
959//!
960int op_ushr_int_lit8() {
961    u2 vA = INST_AA(inst);
962    u2 vB = (u2)FETCH(1) & 0xff;
963    s2 tmp = (s2)FETCH(1) >> 8;
964    int retval = common_shift_int_lit(shr_opc, vA, vB, tmp);
965    rPC += 2;
966    return retval;
967}
968
969int isPowerOfTwo(int imm) {
970    int i;
971    for(i = 1; i < 17; i++) {
972        if(imm == (1 << i)) return i;
973    }
974    return -1;
975}
976
977#define P_GPR_1 PhysicalReg_EBX
978int div_lit_strength_reduction(u2 vA, u2 vB, s2 imm) {
979    if(gDvm.executionMode == kExecutionModeNcgO1) {
980        //strength reduction for div by 2,4,8,...
981        int power = isPowerOfTwo(imm);
982        if(power < 1) return 0;
983        //tmp2 is not updated, so it can share with vB
984        get_virtual_reg(vB, OpndSize_32, 2, false);
985        //if imm is 2, power will be 1
986        if(power == 1) {
987            /* mov tmp1, tmp2
988               shrl $31, tmp1
989               addl tmp2, tmp1
990               sarl $1, tmp1 */
991            move_reg_to_reg(OpndSize_32, 2, false, 1, false);
992            alu_binary_imm_reg(OpndSize_32, shr_opc, 31, 1, false);
993            alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false);
994            alu_binary_imm_reg(OpndSize_32, sar_opc, 1, 1, false);
995            set_virtual_reg(vA, OpndSize_32, 1, false);
996            return 1;
997        }
998        //power > 1
999        /* mov tmp1, tmp2
1000           sarl $power-1, tmp1
1001           shrl 32-$power, tmp1
1002           addl tmp2, tmp1
1003           sarl $power, tmp1 */
1004        move_reg_to_reg(OpndSize_32, 2, false, 1, false);
1005        alu_binary_imm_reg(OpndSize_32, sar_opc, power-1, 1, false);
1006        alu_binary_imm_reg(OpndSize_32, shr_opc, 32-power, 1, false);
1007        alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false);
1008        alu_binary_imm_reg(OpndSize_32, sar_opc, power, 1, false);
1009        set_virtual_reg(vA, OpndSize_32, 1, false);
1010        return 1;
1011    }
1012    return 0;
1013}
1014
1015////////// throws exception!!!
1016//! common code to handle integer DIV & REM with literal
1017
1018//! It uses GPR
1019int common_div_rem_int_lit(bool isRem, u2 vA, u2 vB, s2 imm) {
1020    if(!isRem) {
1021        int retCode = div_lit_strength_reduction(vA, vB, imm);
1022        if(retCode > 0) return 0;
1023    }
1024    if(imm == 0) {
1025        export_pc(); //use %edx
1026#ifdef DEBUG_EXCEPTION
1027        LOGI("EXTRA code to handle exception");
1028#endif
1029        constVREndOfBB();
1030        beforeCall("exception"); //dump GG, GL VRs
1031        unconditional_jump_global_API(
1032                          "common_errDivideByZero", false);
1033
1034        return 0;
1035    }
1036    get_virtual_reg(vB, OpndSize_32, PhysicalReg_EAX, true);
1037    //check against -1 for DIV_INT??
1038    if(imm == -1) {
1039        compare_imm_reg(OpndSize_32, 0x80000000, PhysicalReg_EAX, true);
1040        conditional_jump(Condition_E, ".div_rem_int_lit_special", true);
1041        rememberState(1);
1042    }
1043    move_imm_to_reg(OpndSize_32, imm, 2, false);
1044    convert_integer(OpndSize_32, OpndSize_64); //cdq
1045    //idiv: dividend in edx:eax; quotient in eax; remainder in edx
1046    alu_unary_reg(OpndSize_32, idiv_opc, 2, false);
1047    if(isRem)
1048        set_virtual_reg(vA, OpndSize_32, PhysicalReg_EDX, true);
1049    else
1050        set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
1051
1052    if(imm == -1) {
1053        unconditional_jump(".div_rem_int_lit_okay", true);
1054        rememberState(2);
1055        insertLabel(".div_rem_int_lit_special", true);
1056        goToState(1);
1057        if(isRem)
1058            set_VR_to_imm(vA, OpndSize_32, 0);
1059        else
1060            set_VR_to_imm(vA, OpndSize_32, 0x80000000);
1061        transferToState(2);
1062    }
1063
1064    insertLabel(".div_rem_int_lit_okay", true); //merge point 2
1065    return 0;
1066}
1067#undef P_GPR_1
1068//! lower bytecode DIV_INT_LIT16 by calling common_div_rem_int_lit
1069
1070//!
1071int op_div_int_lit16() {
1072    u2 vA = INST_A(inst);
1073    u2 vB = INST_B(inst);
1074    s4 tmp = (s2)FETCH(1);
1075    int retval = common_div_rem_int_lit(false, vA, vB, tmp);
1076    rPC += 2;
1077    return retval;
1078}
1079//! lower bytecode REM_INT_LIT16 by calling common_div_rem_int_lit
1080
1081//!
1082int op_rem_int_lit16() {
1083    u2 vA = INST_A(inst);
1084    u2 vB = INST_B(inst);
1085    s4 tmp = (s2)FETCH(1);
1086    int retval = common_div_rem_int_lit(true, vA, vB, tmp);
1087    rPC += 2;
1088    return retval;
1089}
1090//! lower bytecode DIV_INT_LIT8 by calling common_div_rem_int_lit
1091
1092//!
1093int op_div_int_lit8() {
1094    u2 vA = INST_AA(inst);
1095    u2 vB = (u2)FETCH(1) & 0xff;
1096    s2 tmp = (s2)FETCH(1) >> 8;
1097    int retval = common_div_rem_int_lit(false, vA, vB, tmp);
1098    rPC += 2;
1099    return retval;
1100}
1101//! lower bytecode REM_INT_LIT8 by calling common_div_rem_int_lit
1102
1103//!
1104int op_rem_int_lit8() {
1105    u2 vA = INST_AA(inst);
1106    u2 vB = (u2)FETCH(1) & 0xff;
1107    s2 tmp = (s2)FETCH(1) >> 8;
1108    int retval = common_div_rem_int_lit(true, vA, vB, tmp);
1109    rPC += 2;
1110    return retval;
1111}
1112//! common code to hanle long ALU ops
1113
1114//! It uses XMM
1115int common_alu_long(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) { //except div and rem
1116    get_virtual_reg(v1, OpndSize_64, 1, false);
1117    get_virtual_reg(v2, OpndSize_64, 2, false);
1118    alu_binary_reg_reg(OpndSize_64, opc, 2, false, 1, false);
1119    set_virtual_reg(vA, OpndSize_64, 1, false);
1120    return 0;
1121}
1122//! lower bytecode ADD_LONG by calling common_alu_long
1123
1124//!
1125int op_add_long() {
1126    u2 vA = INST_AA(inst);
1127    u2 v1 = *((u1*)rPC + 2);
1128    u2 v2 = *((u1*)rPC + 3);
1129    int retval = common_alu_long(add_opc, vA, v1, v2);
1130    rPC += 2;
1131    return retval;
1132}
1133//! lower bytecode SUB_LONG by calling common_alu_long
1134
1135//!
1136int op_sub_long() {
1137    u2 vA = INST_AA(inst);
1138    u2 v1 = *((u1*)rPC + 2);
1139    u2 v2 = *((u1*)rPC + 3);
1140    int retval = common_alu_long(sub_opc, vA, v1, v2);
1141    rPC += 2;
1142    return retval;
1143}
1144//! lower bytecode AND_LONG by calling common_alu_long
1145
1146//!
1147int op_and_long() {
1148    u2 vA = INST_AA(inst);
1149    u2 v1 = *((u1*)rPC + 2);
1150    u2 v2 = *((u1*)rPC + 3);
1151    int retval = common_alu_long(and_opc, vA, v1, v2);
1152    rPC += 2;
1153    return retval;
1154}
1155//! lower bytecode OR_LONG by calling common_alu_long
1156
1157//!
1158int op_or_long() {
1159    u2 vA = INST_AA(inst);
1160    u2 v1 = *((u1*)rPC + 2);
1161    u2 v2 = *((u1*)rPC + 3);
1162    int retval = common_alu_long(or_opc, vA, v1, v2);
1163    rPC += 2;
1164    return retval;
1165}
1166//! lower bytecode XOR_LONG by calling common_alu_long
1167
1168//!
1169int op_xor_long() {
1170    u2 vA = INST_AA(inst);
1171    u2 v1 = *((u1*)rPC + 2);
1172    u2 v2 = *((u1*)rPC + 3);
1173    int retval = common_alu_long(xor_opc, vA, v1, v2);
1174    rPC += 2;
1175    return retval;
1176}
1177//! lower bytecode ADD_LONG_2ADDR by calling common_alu_long
1178
1179//!
1180int op_add_long_2addr() {
1181    u2 vA = INST_A(inst);
1182    u2 v1 = vA;
1183    u2 v2 = INST_B(inst);
1184    int retval = common_alu_long(add_opc, vA, v1, v2);
1185    rPC += 1;
1186    return retval;
1187}
1188//! lower bytecode SUB_LONG_2ADDR by calling common_alu_long
1189
1190//!
1191int op_sub_long_2addr() {
1192    u2 vA = INST_A(inst);
1193    u2 v1 = vA;
1194    u2 v2 = INST_B(inst);
1195    int retval = common_alu_long(sub_opc, vA, v1, v2);
1196    rPC += 1;
1197    return retval;
1198}
1199//! lower bytecode AND_LONG_2ADDR by calling common_alu_long
1200
1201//!
1202int op_and_long_2addr() {
1203    u2 vA = INST_A(inst);
1204    u2 v1 = vA;
1205    u2 v2 = INST_B(inst);
1206    int retval = common_alu_long(and_opc, vA, v1, v2);
1207    rPC += 1;
1208    return retval;
1209}
1210//! lower bytecode OR_LONG_2ADDR by calling common_alu_long
1211
1212//!
1213int op_or_long_2addr() {
1214    u2 vA = INST_A(inst);
1215    u2 v1 = vA;
1216    u2 v2 = INST_B(inst);
1217    int retval = common_alu_long(or_opc, vA, v1, v2);
1218    rPC += 1;
1219    return retval;
1220}
1221//! lower bytecode XOR_LONG_2ADDR by calling common_alu_long
1222
1223//!
1224int op_xor_long_2addr() {
1225    u2 vA = INST_A(inst);
1226    u2 v1 = vA;
1227    u2 v2 = INST_B(inst);
1228    int retval = common_alu_long(xor_opc, vA, v1, v2);
1229    rPC += 1;
1230    return retval;
1231}
1232
1233//signed vs unsigned imul and mul?
1234#define P_GPR_1 PhysicalReg_EBX
1235#define P_GPR_2 PhysicalReg_ECX
1236#define P_GPR_3 PhysicalReg_ESI
1237//! common code to handle multiplication of long
1238
1239//! It uses GPR
1240int common_mul_long(u2 vA, u2 v1, u2 v2) {
1241    get_virtual_reg(v2, OpndSize_32, 1, false);
1242    move_reg_to_reg(OpndSize_32, 1, false, PhysicalReg_EAX, true);
1243    //imul: 2L * 1H update temporary 1
1244    alu_binary_VR_reg(OpndSize_32, imul_opc, (v1+1), 1, false);
1245    get_virtual_reg(v1, OpndSize_32, 3, false);
1246    move_reg_to_reg(OpndSize_32, 3, false, 2, false);
1247    //imul: 1L * 2H
1248    alu_binary_VR_reg(OpndSize_32, imul_opc, (v2+1), 2, false);
1249    alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false);
1250    alu_unary_reg(OpndSize_32, mul_opc, 3, false);
1251    alu_binary_reg_reg(OpndSize_32, add_opc, PhysicalReg_EDX, true, 1, false);
1252    set_virtual_reg(vA+1, OpndSize_32, 1, false);
1253    set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
1254    return 0;
1255}
1256#undef P_GPR_1
1257#undef P_GPR_2
1258#undef P_GPR_3
1259//! lower bytecode MUL_LONG by calling common_mul_long
1260
1261//!
1262int op_mul_long() {
1263    u2 vA = INST_AA(inst);
1264    u2 v1 = *((u1*)rPC + 2);
1265    u2 v2 = *((u1*)rPC + 3);
1266    int retval = common_mul_long(vA, v1, v2);
1267    rPC += 2;
1268    return retval;
1269}
1270//! lower bytecode MUL_LONG_2ADDR by calling common_mul_long
1271
1272//!
1273int op_mul_long_2addr() {
1274    u2 vA = INST_A(inst);
1275    u2 v1 = vA;
1276    u2 v2 = INST_B(inst);
1277    int retval = common_mul_long(vA, v1, v2);
1278    rPC += 1;
1279    return retval;
1280}
1281
1282#define P_GPR_1 PhysicalReg_EBX
1283#define P_GPR_2 PhysicalReg_ECX
1284//! common code to handle DIV & REM of long
1285
1286//! It uses GPR & XMM; and calls call_moddi3 & call_divdi3
1287int common_div_rem_long(bool isRem, u2 vA, u2 v1, u2 v2) {
1288    get_virtual_reg(v2, OpndSize_32, 1, false);
1289    get_virtual_reg(v2+1, OpndSize_32, 2, false);
1290    //save to native stack before changing register P_GPR_1
1291    load_effective_addr(-16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
1292    move_reg_to_mem(OpndSize_32, 1, false, 8, PhysicalReg_ESP, true);
1293    alu_binary_reg_reg(OpndSize_32, or_opc, 2, false, 1, false);
1294
1295    handlePotentialException(
1296                                       Condition_E, Condition_NE,
1297                                       1, "common_errDivideByZero");
1298    move_reg_to_mem(OpndSize_32, 2, false, 12, PhysicalReg_ESP, true);
1299    get_virtual_reg(v1, OpndSize_64, 1, false);
1300    move_reg_to_mem(OpndSize_64, 1, false, 0, PhysicalReg_ESP, true);
1301    scratchRegs[0] = PhysicalReg_SCRATCH_1;
1302    nextVersionOfHardReg(PhysicalReg_EDX, 2); //next version has 2 refs
1303    if(isRem)
1304        call_moddi3();
1305    else
1306        call_divdi3();
1307    load_effective_addr(16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
1308    set_virtual_reg(vA+1, OpndSize_32,PhysicalReg_EDX, true);
1309    set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
1310    return 0;
1311}
1312#undef P_GPR_1
1313#undef P_GPR_2
1314//! lower bytecode DIV_LONG by calling common_div_rem_long
1315
1316//!
1317int op_div_long() {
1318    u2 vA = INST_AA(inst);
1319    u2 v1 = *((u1*)rPC + 2);
1320    u2 v2 = *((u1*)rPC + 3);
1321    int retval = common_div_rem_long(false, vA, v1, v2);
1322    rPC += 2;
1323    return retval;
1324}
1325//! lower bytecode REM_LONG by calling common_div_rem_long
1326
1327//!
1328int op_rem_long() {
1329    u2 vA = INST_AA(inst);
1330    u2 v1 = *((u1*)rPC + 2);
1331    u2 v2 = *((u1*)rPC + 3);
1332    int retval = common_div_rem_long(true, vA, v1, v2);
1333    rPC += 2;
1334    return retval;
1335}
1336//! lower bytecode DIV_LONG_2ADDR by calling common_div_rem_long
1337
1338//!
1339int op_div_long_2addr() {
1340    u2 vA = INST_A(inst);
1341    u2 v1 = vA;
1342    u2 v2 = INST_B(inst);
1343    int retval = common_div_rem_long(false, vA, v1, v2);
1344    rPC += 1;
1345    return retval;
1346}
1347//! lower bytecode REM_LONG_2ADDR by calling common_div_rem_long
1348
1349//!
1350int op_rem_long_2addr() { //call __moddi3 instead of __divdi3
1351    u2 vA = INST_A(inst);
1352    u2 v1 = vA;
1353    u2 v2 = INST_B(inst);
1354    int retval = common_div_rem_long(true, vA, v1, v2);
1355    rPC += 1;
1356    return retval;
1357}
1358
1359//! common code to handle SHL long
1360
1361//! It uses XMM
1362int common_shl_long(u2 vA, u2 v1, u2 v2) {
1363    get_VR_ss(v2, 2, false);
1364
1365    load_global_data_API("shiftMask", OpndSize_64, 3, false);
1366
1367    get_virtual_reg(v1, OpndSize_64, 1, false);
1368    alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false);
1369    alu_binary_reg_reg(OpndSize_64, sll_opc, 2, false, 1, false);
1370    set_virtual_reg(vA, OpndSize_64, 1, false);
1371    return 0;
1372}
1373
1374//! common code to handle SHR long
1375
1376//! It uses XMM
1377int common_shr_long(u2 vA, u2 v1, u2 v2) {
1378    get_VR_ss(v2, 2, false);
1379
1380    load_global_data_API("shiftMask", OpndSize_64, 3, false);
1381
1382    get_virtual_reg(v1, OpndSize_64, 1, false);
1383    alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false);
1384    alu_binary_reg_reg(OpndSize_64, srl_opc, 2, false, 1, false);
1385    compare_imm_VR(OpndSize_32, 0, (v1+1));
1386    conditional_jump(Condition_GE, ".common_shr_long_special", true);
1387    rememberState(1);
1388
1389    load_global_data_API("value64", OpndSize_64, 4, false);
1390
1391    alu_binary_reg_reg(OpndSize_64, sub_opc, 2, false, 4, false);
1392
1393    load_global_data_API("64bits", OpndSize_64, 5, false);
1394
1395    alu_binary_reg_reg(OpndSize_64, sll_opc, 4, false, 5, false);
1396    alu_binary_reg_reg(OpndSize_64, or_opc, 5, false, 1, false);
1397    rememberState(2);
1398    //check whether the target is next instruction TODO
1399    unconditional_jump(".common_shr_long_done", true);
1400
1401    insertLabel(".common_shr_long_special", true);
1402    goToState(1);
1403    transferToState(2);
1404    insertLabel(".common_shr_long_done", true);
1405    set_virtual_reg(vA, OpndSize_64, 1, false);
1406    return 0;
1407}
1408
1409//! common code to handle USHR long
1410
1411//! It uses XMM
1412int common_ushr_long(u2 vA, u2 v1, u2 v2) {
1413    get_VR_sd(v1, 1, false);
1414    get_VR_ss(v2, 2, false);
1415
1416    load_sd_global_data_API("shiftMask", 3, false);
1417
1418    alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false);
1419    alu_binary_reg_reg(OpndSize_64, srl_opc, 2, false, 1, false);
1420    set_VR_sd(vA, 1, false);
1421    return 0;
1422}
1423//! lower bytecode SHL_LONG by calling common_shl_long
1424
1425//!
1426int op_shl_long() {
1427    u2 vA = INST_AA(inst);
1428    u2 v1 = *((u1*)rPC + 2);
1429    u2 v2 = *((u1*)rPC + 3);
1430    int retval = common_shl_long(vA, v1, v2);
1431    rPC += 2;
1432    return retval;
1433}
1434//! lower bytecode SHL_LONG_2ADDR by calling common_shl_long
1435
1436//!
1437int op_shl_long_2addr() {
1438    u2 vA = INST_A(inst);
1439    u2 v1 = vA;
1440    u2 v2 = INST_B(inst);
1441    int retval = common_shl_long(vA, v1, v2);
1442    rPC += 1;
1443    return retval;
1444}
1445//! lower bytecode SHR_LONG by calling common_shr_long
1446
1447//!
1448int op_shr_long() {
1449    u2 vA = INST_AA(inst);
1450    u2 v1 = *((u1*)rPC + 2);
1451    u2 v2 = *((u1*)rPC + 3);
1452    int retval = common_shr_long(vA, v1, v2);
1453    rPC += 2;
1454    return retval;
1455}
1456//! lower bytecode SHR_LONG_2ADDR by calling common_shr_long
1457
1458//!
1459int op_shr_long_2addr() {
1460    u2 vA = INST_A(inst);
1461    u2 v1 = vA;
1462    u2 v2 = INST_B(inst);
1463    int retval = common_shr_long(vA, v1, v2);
1464    rPC += 1;
1465    return retval;
1466}
1467//! lower bytecode USHR_LONG by calling common_ushr_long
1468
1469//!
1470int op_ushr_long() {
1471    u2 vA = INST_AA(inst);
1472    u2 v1 = *((u1*)rPC + 2);
1473    u2 v2 = *((u1*)rPC + 3);
1474    int retval = common_ushr_long(vA, v1, v2);
1475    rPC += 2;
1476    return retval;
1477}
1478//! lower bytecode USHR_LONG_2ADDR by calling common_ushr_long
1479
1480//!
1481int op_ushr_long_2addr() {
1482    u2 vA = INST_A(inst);
1483    u2 v1 = vA;
1484    u2 v2 = INST_B(inst);
1485    int retval = common_ushr_long(vA, v1, v2);
1486    rPC += 1;
1487    return retval;
1488}
1489#define USE_MEM_OPERAND
1490///////////////////////////////////////////
1491//! common code to handle ALU of floats
1492
1493//! It uses XMM
1494int common_alu_float(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) {//add, sub, mul
1495    get_VR_ss(v1, 1, false);
1496#ifdef USE_MEM_OPERAND
1497    alu_sd_binary_VR_reg(opc, v2, 1, false, false/*isSD*/);
1498#else
1499    get_VR_ss(v2, 2, false);
1500    alu_ss_binary_reg_reg(opc, 2, false, 1, false);
1501#endif
1502    set_VR_ss(vA, 1, false);
1503    return 0;
1504}
1505//! lower bytecode ADD_FLOAT by calling common_alu_float
1506
1507//!
1508int op_add_float() {
1509    u2 vA = INST_AA(inst);
1510    u2 v1 = *((u1*)rPC + 2);
1511    u2 v2 = *((u1*)rPC + 3);
1512    int retval = common_alu_float(add_opc, vA, v1, v2);
1513    rPC += 2;
1514    return retval;
1515}
1516//! lower bytecode SUB_FLOAT by calling common_alu_float
1517
1518//!
1519int op_sub_float() {
1520    u2 vA = INST_AA(inst);
1521    u2 v1 = *((u1*)rPC + 2);
1522    u2 v2 = *((u1*)rPC + 3);
1523    int retval = common_alu_float(sub_opc, vA, v1, v2);
1524    rPC += 2;
1525    return retval;
1526}
1527//! lower bytecode MUL_FLOAT by calling common_alu_float
1528
1529//!
1530int op_mul_float() {
1531    u2 vA = INST_AA(inst);
1532    u2 v1 = *((u1*)rPC + 2);
1533    u2 v2 = *((u1*)rPC + 3);
1534    int retval = common_alu_float(mul_opc, vA, v1, v2);
1535    rPC += 2;
1536    return retval;
1537}
1538//! lower bytecode ADD_FLOAT_2ADDR by calling common_alu_float
1539
1540//!
1541int op_add_float_2addr() {
1542    u2 vA = INST_A(inst);
1543    u2 v1 = vA;
1544    u2 v2 = INST_B(inst);
1545    int retval = common_alu_float(add_opc, vA, v1, v2);
1546    rPC += 1;
1547    return retval;
1548}
1549//! lower bytecode SUB_FLOAT_2ADDR by calling common_alu_float
1550
1551//!
1552int op_sub_float_2addr() {
1553    u2 vA = INST_A(inst);
1554    u2 v1 = vA;
1555    u2 v2 = INST_B(inst);
1556    int retval = common_alu_float(sub_opc, vA, v1, v2);
1557    rPC += 1;
1558    return retval;
1559}
1560//! lower bytecode MUL_FLOAT_2ADDR by calling common_alu_float
1561
1562//!
1563int op_mul_float_2addr() {
1564    u2 vA = INST_A(inst);
1565    u2 v1 = vA;
1566    u2 v2 = INST_B(inst);
1567    int retval = common_alu_float(mul_opc, vA, v1, v2);
1568    rPC += 1;
1569    return retval;
1570}
1571//! common code to handle DIV of float
1572
1573//! It uses FP stack
1574int common_div_float(u2 vA, u2 v1, u2 v2) {
1575    load_fp_stack_VR(OpndSize_32, v1); //flds
1576    fpu_VR(div_opc, OpndSize_32, v2);
1577    store_fp_stack_VR(true, OpndSize_32, vA); //fstps
1578    return 0;
1579}
1580//! lower bytecode DIV_FLOAT by calling common_div_float
1581
1582//!
1583int op_div_float() {
1584    u2 vA = INST_AA(inst);
1585    u2 v1 = *((u1*)rPC + 2);
1586    u2 v2 = *((u1*)rPC + 3);
1587    int retval = common_alu_float(div_opc, vA, v1, v2);
1588    rPC += 2;
1589    return retval;
1590}
1591//! lower bytecode DIV_FLOAT_2ADDR by calling common_div_float
1592
1593//!
1594int op_div_float_2addr() {
1595    u2 vA = INST_A(inst);
1596    u2 v1 = vA;
1597    u2 v2 = INST_B(inst);
1598    int retval = common_alu_float(div_opc, vA, v1, v2);
1599    rPC += 1;
1600    return retval;
1601}
1602//! common code to handle DIV of double
1603
1604//! It uses XMM
1605int common_alu_double(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) {//add, sub, mul
1606    get_VR_sd(v1, 1, false);
1607#ifdef USE_MEM_OPERAND
1608    alu_sd_binary_VR_reg(opc, v2, 1, false, true /*isSD*/);
1609#else
1610    get_VR_sd(v2, 2, false);
1611    alu_sd_binary_reg_reg(opc, 2, false, 1, false);
1612#endif
1613    set_VR_sd(vA, 1, false);
1614    return 0;
1615}
1616//! lower bytecode ADD_DOUBLE by calling common_alu_double
1617
1618//!
1619int op_add_double() {
1620    u2 vA = INST_AA(inst);
1621    u2 v1 = *((u1*)rPC + 2);
1622    u2 v2 = *((u1*)rPC + 3);
1623    int retval = common_alu_double(add_opc, vA, v1, v2);
1624    rPC += 2;
1625    return retval;
1626}
1627//! lower bytecode SUB_DOUBLE by calling common_alu_double
1628
1629//!
1630int op_sub_double() {
1631    u2 vA = INST_AA(inst);
1632    u2 v1 = *((u1*)rPC + 2);
1633    u2 v2 = *((u1*)rPC + 3);
1634    int retval = common_alu_double(sub_opc, vA, v1, v2);
1635    rPC += 2;
1636    return retval;
1637}
1638//! lower bytecode MUL_DOUBLE by calling common_alu_double
1639
1640//!
1641int op_mul_double() {
1642    u2 vA = INST_AA(inst);
1643    u2 v1 = *((u1*)rPC + 2);
1644    u2 v2 = *((u1*)rPC + 3);
1645    int retval = common_alu_double(mul_opc, vA, v1, v2);
1646    rPC += 2;
1647    return retval;
1648}
1649//! lower bytecode ADD_DOUBLE_2ADDR by calling common_alu_double
1650
1651//!
1652int op_add_double_2addr() {
1653    u2 vA = INST_A(inst);
1654    u2 v1 = vA;
1655    u2 v2 = INST_B(inst);
1656    int retval = common_alu_double(add_opc, vA, v1, v2);
1657    rPC += 1;
1658    return retval;
1659}
1660//! lower bytecode SUB_DOUBLE_2ADDR by calling common_alu_double
1661
1662//!
1663int op_sub_double_2addr() {
1664    u2 vA = INST_A(inst);
1665    u2 v1 = vA;
1666    u2 v2 = INST_B(inst);
1667    int retval = common_alu_double(sub_opc, vA, v1, v2);
1668    rPC += 1;
1669    return retval;
1670}
1671//! lower bytecode MUL_DOUBLE_2ADDR by calling common_alu_double
1672
1673//!
1674int op_mul_double_2addr() {
1675    u2 vA = INST_A(inst);
1676    u2 v1 = vA;
1677    u2 v2 = INST_B(inst);
1678    int retval = common_alu_double(mul_opc, vA, v1, v2);
1679    rPC += 1;
1680    return retval;
1681}
1682//! common code to handle DIV of double
1683
1684//! It uses FP stack
1685int common_div_double(u2 vA, u2 v1, u2 v2) {
1686    load_fp_stack_VR(OpndSize_64, v1); //fldl
1687    fpu_VR(div_opc, OpndSize_64, v2); //fdivl
1688    store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
1689    return 0;
1690}
1691//! lower bytecode DIV_DOUBLE by calling common_div_double
1692
1693//!
1694int op_div_double() {
1695    u2 vA = INST_AA(inst);
1696    u2 v1 = *((u1*)rPC + 2);
1697    u2 v2 = *((u1*)rPC + 3);
1698    int retval = common_alu_double(div_opc, vA, v1, v2);
1699    rPC += 2;
1700    return retval;
1701}
1702//! lower bytecode DIV_DOUBLE_2ADDR by calling common_div_double
1703
1704//!
1705int op_div_double_2addr() {
1706    u2 vA = INST_A(inst);
1707    u2 v1 = vA;
1708    u2 v2 = INST_B(inst);
1709    int retval = common_alu_double(div_opc, vA, v1, v2);
1710    rPC += 1;
1711    return retval;
1712}
1713#define P_GPR_1 PhysicalReg_EBX
1714#define P_GPR_2 PhysicalReg_ECX
1715//! common code to handle REM of float
1716
1717//! It uses GPR & calls call_fmodf
1718int common_rem_float(u2 vA, u2 v1, u2 v2) {
1719    get_virtual_reg(v1, OpndSize_32, 1, false);
1720    get_virtual_reg(v2, OpndSize_32, 2, false);
1721    load_effective_addr(-8, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
1722    move_reg_to_mem(OpndSize_32, 1, false, 0, PhysicalReg_ESP, true);
1723    move_reg_to_mem(OpndSize_32, 2, false, 4, PhysicalReg_ESP, true);
1724    scratchRegs[0] = PhysicalReg_SCRATCH_1;
1725    call_fmodf(); //(float x, float y) return float
1726    load_effective_addr(8, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
1727    store_fp_stack_VR(true, OpndSize_32, vA); //fstps
1728    return 0;
1729}
1730#undef P_GPR_1
1731#undef P_GPR_2
1732//! lower bytecode REM_FLOAT by calling common_rem_float
1733
1734//!
1735int op_rem_float() {
1736    u2 vA = INST_AA(inst);
1737    u2 v1 = *((u1*)rPC + 2);
1738    u2 v2 = *((u1*)rPC + 3);
1739    int retval = common_rem_float(vA, v1, v2);
1740    rPC += 2;
1741    return retval;
1742}
1743//! lower bytecode REM_FLOAT_2ADDR by calling common_rem_float
1744
1745//!
1746int op_rem_float_2addr() {
1747    u2 vA = INST_A(inst);
1748    u2 v1 = vA;
1749    u2 v2 = INST_B(inst);
1750    int retval = common_rem_float(vA, v1, v2);
1751    rPC += 1;
1752    return retval;
1753}
1754//! common code to handle REM of double
1755
1756//! It uses XMM & calls call_fmod
1757int common_rem_double(u2 vA, u2 v1, u2 v2) {
1758    get_virtual_reg(v1, OpndSize_64, 1, false);
1759    get_virtual_reg(v2, OpndSize_64, 2, false);
1760    load_effective_addr(-16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
1761    move_reg_to_mem(OpndSize_64, 1, false, 0, PhysicalReg_ESP, true);
1762    move_reg_to_mem(OpndSize_64, 2, false, 8, PhysicalReg_ESP, true);
1763    scratchRegs[0] = PhysicalReg_SCRATCH_1;
1764    call_fmod(); //(long double x, long double y) return double
1765    load_effective_addr(16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
1766    store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
1767    return 0;
1768}
1769//! lower bytecode REM_DOUBLE by calling common_rem_double
1770
1771//!
1772int op_rem_double() {
1773    u2 vA = INST_AA(inst);
1774    u2 v1 = *((u1*)rPC + 2);
1775    u2 v2 = *((u1*)rPC + 3);
1776    int retval = common_rem_double(vA, v1, v2);
1777    rPC += 2;
1778    return retval;
1779}
1780//! lower bytecode REM_DOUBLE_2ADDR by calling common_rem_double
1781
1782//!
1783int op_rem_double_2addr() {
1784    u2 vA = INST_A(inst);
1785    u2 v1 = vA;
1786    u2 v2 = INST_B(inst);
1787    int retval = common_rem_double(vA, v1, v2);
1788    rPC += 1;
1789    return retval;
1790}
1791//! lower bytecode CMPL_FLOAT
1792
1793//!
1794int op_cmpl_float() {
1795    u2 vA = INST_AA(inst);
1796    u4 v1 = FETCH(1) & 0xff;
1797    u4 v2 = FETCH(1) >> 8;
1798    get_VR_ss(v1, 1, false); //xmm
1799    move_imm_to_reg(OpndSize_32, 0, 1, false);
1800    move_imm_to_reg(OpndSize_32, 1, 2, false);
1801    move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
1802    compare_VR_ss_reg(v2, 1, false);
1803    //default: 0xffffffff??
1804    move_imm_to_reg(OpndSize_32,
1805                                 0xffffffff, 4, false);
1806    //ORDER of cmov matters !!! (Z,P,A)
1807    //finalNaN: unordered 0xffffffff
1808    conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
1809                                             1, false, 4, false);
1810    conditional_move_reg_to_reg(OpndSize_32, Condition_P,
1811                                             3, false, 4, false);
1812    conditional_move_reg_to_reg(OpndSize_32, Condition_A,
1813                                             2, false, 4, false);
1814    set_virtual_reg(vA, OpndSize_32, 4, false);
1815    rPC += 2;
1816    return 0;
1817}
1818//! lower bytecode CMPG_FLOAT
1819
1820//!
1821int op_cmpg_float() {
1822    u2 vA = INST_AA(inst);
1823    u4 v1 = FETCH(1) & 0xff;
1824    u4 v2 = FETCH(1) >> 8;
1825    get_VR_ss(v1, 1, false);
1826    compare_VR_ss_reg(v2, 1, false);
1827    move_imm_to_reg(OpndSize_32, 0, 1, false);
1828    move_imm_to_reg(OpndSize_32, 1, 2, false);
1829    //default: 0xffffffff??
1830    move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
1831    conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
1832                                1, false, 3, false);
1833    //finalNaN: unordered
1834    conditional_move_reg_to_reg(OpndSize_32, Condition_P,
1835                                2, false, 3, false);
1836    conditional_move_reg_to_reg(OpndSize_32, Condition_A,
1837                                2, false, 3, false);
1838    set_virtual_reg(vA, OpndSize_32, 3, false);
1839    rPC += 2;
1840    return 0;
1841}
1842//! lower bytecode CMPL_DOUBLE
1843
1844//!
1845int op_cmpl_double() {
1846    u2 vA = INST_AA(inst);
1847    u4 v1 = FETCH(1) & 0xff;
1848    u4 v2 = FETCH(1) >> 8;
1849    get_VR_sd(v1, 1, false);
1850    compare_VR_sd_reg(v2, 1, false);
1851    move_imm_to_reg(OpndSize_32, 0, 1, false);
1852    move_imm_to_reg(OpndSize_32, 1, 2, false);
1853    move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
1854
1855    //default: 0xffffffff??
1856    move_imm_to_reg(OpndSize_32, 0xffffffff, 4, false);
1857    conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
1858                                             1, false, 4, false);
1859    conditional_move_reg_to_reg(OpndSize_32, Condition_P,
1860                                             3, false, 4, false);
1861    conditional_move_reg_to_reg(OpndSize_32, Condition_A,
1862                                             2, false, 4, false);
1863    set_virtual_reg(vA, OpndSize_32, 4, false);
1864    rPC += 2;
1865    return 0;
1866}
1867//! lower bytecode CMPG_DOUBLE
1868
1869//!
1870int op_cmpg_double() {
1871    u2 vA = INST_AA(inst);
1872    u4 v1 = FETCH(1) & 0xff;
1873    u4 v2 = FETCH(1) >> 8;
1874    get_VR_sd(v1, 1, false);
1875    compare_VR_sd_reg(v2, 1, false);
1876    move_imm_to_reg(OpndSize_32, 0, 1, false);
1877    move_imm_to_reg(OpndSize_32, 1, 2, false);
1878
1879    //default: 0xffffffff??
1880    move_imm_to_reg(OpndSize_32,
1881                                 0xffffffff, 3, false);
1882    conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
1883                                             1, false, 3, false);
1884    //finalNaN: unordered
1885    conditional_move_reg_to_reg(OpndSize_32, Condition_P,
1886                                             2, false, 3, false);
1887    conditional_move_reg_to_reg(OpndSize_32, Condition_A,
1888                                             2, false, 3, false);
1889   set_virtual_reg(vA, OpndSize_32, 3, false);
1890    rPC += 2;
1891    return 0;
1892}
1893#define P_GPR_1 PhysicalReg_EBX
1894#define P_GPR_2 PhysicalReg_ECX
1895#define P_GPR_3 PhysicalReg_ESI
1896#define P_SCRATCH_1 PhysicalReg_EDX
1897#define P_SCRATCH_2 PhysicalReg_EAX
1898#define OPTION_OLD //for simpler cfg
1899//! lower bytecode CMP_LONG
1900
1901//!
1902int op_cmp_long() {
1903    u2 vA = INST_AA(inst);
1904    u4 v1 = FETCH(1) & 0xff;
1905    u4 v2 = FETCH(1) >> 8;
1906    get_virtual_reg(v1+1, OpndSize_32, 2, false);
1907#ifdef OPTION_OLD
1908    move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
1909    move_imm_to_reg(OpndSize_32, 1, 4, false);
1910    move_imm_to_reg(OpndSize_32, 0, 5, false);
1911#endif
1912    compare_VR_reg(OpndSize_32,
1913                                v2+1, 2, false);
1914#ifndef OPTION_OLD
1915    conditional_jump(Condition_L, ".cmp_long_less", true);
1916    conditional_jump(Condition_G, ".cmp_long_greater", true);
1917#else
1918    conditional_jump(Condition_E, ".cmp_long_equal", true);
1919    rememberState(1);
1920    conditional_move_reg_to_reg(OpndSize_32, Condition_L, //below vs less
1921                                             3, false, 6, false);
1922    conditional_move_reg_to_reg(OpndSize_32, Condition_G, //above vs greater
1923                                             4, false, 6, false);
1924    set_virtual_reg(vA, OpndSize_32, 6, false);
1925    rememberState(2);
1926    unconditional_jump(".cmp_long_okay", true);
1927    insertLabel(".cmp_long_equal", true);
1928    goToState(1);
1929#endif
1930
1931    get_virtual_reg(v1, OpndSize_32, 1, false);
1932    compare_VR_reg(OpndSize_32,
1933                                v2, 1, false);
1934#ifdef OPTION_OLD
1935    conditional_move_reg_to_reg(OpndSize_32, Condition_E,
1936                                             5, false, 6, false);
1937    conditional_move_reg_to_reg(OpndSize_32, Condition_B, //below vs less
1938                                             3, false, 6, false);
1939    conditional_move_reg_to_reg(OpndSize_32, Condition_A, //above vs greater
1940                                             4, false, 6, false);
1941    set_virtual_reg(vA, OpndSize_32, 6, false);
1942    transferToState(2);
1943#else
1944    conditional_jump(Condition_A, ".cmp_long_greater", true);
1945    conditional_jump(Condition_NE, ".cmp_long_less", true);
1946    set_VR_to_imm(vA, OpndSize_32, 0);
1947    unconditional_jump(".cmp_long_okay", true);
1948
1949    insertLabel(".cmp_long_less", true);
1950    set_VR_to_imm(vA, OpndSize_32, 0xffffffff);
1951    unconditional_jump(".cmp_long_okay", true);
1952
1953    insertLabel(".cmp_long_greater", true);
1954    set_VR_to_imm(vA, OpndSize_32, 1);
1955#endif
1956    insertLabel(".cmp_long_okay", true);
1957    rPC += 2;
1958    return 0;
1959}
1960#undef P_GPR_1
1961#undef P_GPR_2
1962#undef P_GPR_3
1963