1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18/*! \file LowerAlu.cpp
19    \brief This file lowers ALU bytecodes.
20*/
21#include "libdex/DexOpcodes.h"
22#include "libdex/DexFile.h"
23#include "Lower.h"
24#include "NcgAot.h"
25#include "enc_wrapper.h"
26
27/////////////////////////////////////////////
28#define P_GPR_1 PhysicalReg_EBX
29//! lower bytecode NEG_INT
30
31//!
32int op_neg_int() {
33    u2 vA = INST_A(inst); //destination
34    u2 vB = INST_B(inst);
35    get_virtual_reg(vB, OpndSize_32, 1, false);
36    alu_unary_reg(OpndSize_32, neg_opc, 1, false);
37    set_virtual_reg(vA, OpndSize_32, 1, false);
38    rPC += 1;
39    return 0;
40}
41//! lower bytecode NOT_INT
42
43//!
44int op_not_int() {
45    u2 vA = INST_A(inst); //destination
46    u2 vB = INST_B(inst);
47    get_virtual_reg(vB, OpndSize_32, 1, false);
48    alu_unary_reg(OpndSize_32, not_opc, 1, false);
49    set_virtual_reg(vA, OpndSize_32, 1, false);
50    rPC += 1;
51    return 0;
52}
53#undef P_GPR_1
54//! lower bytecode NEG_LONG
55
56//! This implementation uses XMM registers
57int op_neg_long() {
58    u2 vA = INST_A(inst); //destination
59    u2 vB = INST_B(inst);
60    get_virtual_reg(vB, OpndSize_64, 1, false);
61    alu_binary_reg_reg(OpndSize_64, xor_opc, 2, false, 2, false);
62    alu_binary_reg_reg(OpndSize_64, sub_opc, 1, false, 2, false);
63    set_virtual_reg(vA, OpndSize_64, 2, false);
64    rPC += 1;
65    return 0;
66}
67//! lower bytecode NOT_LONG
68
69//! This implementation uses XMM registers
70int op_not_long() {
71    u2 vA = INST_A(inst); //destination
72    u2 vB = INST_B(inst);
73    get_virtual_reg(vB, OpndSize_64, 1, false);
74    load_global_data_API("64bits", OpndSize_64, 2, false);
75    alu_binary_reg_reg(OpndSize_64, andn_opc, 2, false, 1, false);
76    set_virtual_reg(vA, OpndSize_64, 1, false);
77    rPC += 1;
78    return 0;
79}
80#define P_GPR_1 PhysicalReg_EBX
81//! lower bytecode NEG_FLOAT
82
83//! This implementation uses GPR
84int op_neg_float() {
85    u2 vA = INST_A(inst); //destination
86    u2 vB = INST_B(inst);
87    get_virtual_reg(vB, OpndSize_32, 1, false);
88    alu_binary_imm_reg(OpndSize_32, add_opc, 0x80000000, 1, false);
89    set_virtual_reg(vA, OpndSize_32, 1, false);
90    rPC += 1;
91    return 0;
92}
93#undef P_GPR_1
94
95//! lower bytecode NEG_DOUBLE
96
97//! This implementation uses XMM registers
98int op_neg_double() {
99    u2 vA = INST_A(inst); //destination
100    u2 vB = INST_B(inst);
101    get_virtual_reg(vB, OpndSize_64, 1, false);
102    load_global_data_API("doubNeg", OpndSize_64, 2, false);
103    alu_binary_reg_reg(OpndSize_64, xor_opc, 1, false, 2, false);
104    set_virtual_reg(vA, OpndSize_64, 2, false);
105    rPC += 1;
106    return 0;
107}
108
109//! lower bytecode INT_TO_LONG
110
111//! It uses native instruction cdq
112int op_int_to_long() {
113    u2 vA = INST_A(inst); //destination
114    u2 vB = INST_B(inst);
115    get_virtual_reg(vB, OpndSize_32, PhysicalReg_EAX, true);
116    convert_integer(OpndSize_32, OpndSize_64);
117    set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
118    set_virtual_reg(vA+1, OpndSize_32, PhysicalReg_EDX, true);
119    rPC += 1;
120    return 0;
121}
122//! lower bytecode INT_TO_FLOAT
123
124//! This implementation uses FP stack
125int op_int_to_float() {
126    u2 vA = INST_A(inst); //destination
127    u2 vB = INST_B(inst);
128    load_int_fp_stack_VR(OpndSize_32, vB); //fildl
129    store_fp_stack_VR(true, OpndSize_32, vA); //fstps
130    rPC += 1;
131    return 0;
132}
133//! lower bytecode INT_TO_DOUBLE
134
135//! This implementation uses FP stack
136int op_int_to_double() {
137    u2 vA = INST_A(inst); //destination
138    u2 vB = INST_B(inst);
139    load_int_fp_stack_VR(OpndSize_32, vB); //fildl
140    store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
141    rPC += 1;
142    return 0;
143}
144//! lower bytecode LONG_TO_FLOAT
145
146//! This implementation uses FP stack
147int op_long_to_float() {
148    u2 vA = INST_A(inst); //destination
149    u2 vB = INST_B(inst);
150    load_int_fp_stack_VR(OpndSize_64, vB); //fildll
151    store_fp_stack_VR(true, OpndSize_32, vA); //fstps
152    rPC += 1;
153    return 0;
154}
155//! lower bytecode LONG_TO_DOUBLE
156
157//! This implementation uses FP stack
158int op_long_to_double() {
159    u2 vA = INST_A(inst); //destination
160    u2 vB = INST_B(inst);
161    load_int_fp_stack_VR(OpndSize_64, vB); //fildll
162    store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
163    rPC += 1;
164    return 0;
165}
166//! lower bytecode FLOAT_TO_DOUBLE
167
168//! This implementation uses FP stack
169int op_float_to_double() {
170    u2 vA = INST_A(inst); //destination
171    u2 vB = INST_B(inst);
172    load_fp_stack_VR(OpndSize_32, vB); //flds
173    store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
174    rPC += 1;
175    return 0;
176}
177//! lower bytecode DOUBLE_TO_FLOAT
178
179//! This implementation uses FP stack
180int op_double_to_float() {
181    u2 vA = INST_A(inst); //destination
182    u2 vB = INST_B(inst);
183    load_fp_stack_VR(OpndSize_64, vB); //fldl
184    store_fp_stack_VR(true, OpndSize_32, vA); //fstps
185    rPC += 1;
186    return 0;
187}
188#define P_GPR_1 PhysicalReg_EBX
189//! lower bytecode LONG_TO_INT
190
191//! This implementation uses GPR
192int op_long_to_int() {
193    u2 vA = INST_A(inst); //destination
194    u2 vB = INST_B(inst);
195    get_virtual_reg(vB, OpndSize_32, 1, false);
196    set_virtual_reg(vA, OpndSize_32, 1, false);
197    rPC += 1;
198    return 0;
199}
200#undef P_GPR_1
201
202//! common code to convert a float or double to integer
203
204//! It uses FP stack
205int common_fp_to_int(bool isDouble, u2 vA, u2 vB) {
206    if(isDouble) {
207        load_fp_stack_VR(OpndSize_64, vB); //fldl
208    }
209    else {
210        load_fp_stack_VR(OpndSize_32, vB); //flds
211    }
212
213    load_fp_stack_global_data_API("intMax", OpndSize_32);
214    load_fp_stack_global_data_API("intMin", OpndSize_32);
215
216    //ST(0) ST(1) ST(2) --> LintMin LintMax value
217    compare_fp_stack(true, 2, false/*isDouble*/); //ST(2)
218    //ST(0) ST(1) --> LintMax value
219    conditional_jump(Condition_AE, ".float_to_int_negInf", true);
220    rememberState(1);
221    compare_fp_stack(true, 1, false/*isDouble*/); //ST(1)
222    //ST(0) --> value
223    rememberState(2);
224    conditional_jump(Condition_C, ".float_to_int_nanInf", true);
225    //fnstcw, orw, fldcw, xorw
226    load_effective_addr(-2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
227    store_fpu_cw(false/*checkException*/, 0, PhysicalReg_ESP, true);
228    alu_binary_imm_mem(OpndSize_16, or_opc, 0xc00, 0, PhysicalReg_ESP, true);
229    load_fpu_cw(0, PhysicalReg_ESP, true);
230    alu_binary_imm_mem(OpndSize_16, xor_opc, 0xc00, 0, PhysicalReg_ESP, true);
231    store_int_fp_stack_VR(true/*pop*/, OpndSize_32, vA); //fistpl
232    //fldcw
233    load_fpu_cw(0, PhysicalReg_ESP, true);
234    load_effective_addr(2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
235    rememberState(3);
236    unconditional_jump(".float_to_int_okay", true);
237    insertLabel(".float_to_int_nanInf", true);
238    conditional_jump(Condition_NP, ".float_to_int_posInf", true);
239    //fstps CHECK
240    goToState(2);
241    store_fp_stack_VR(true, OpndSize_32, vA);
242    set_VR_to_imm(vA, OpndSize_32, 0);
243    transferToState(3);
244    unconditional_jump(".float_to_int_okay", true);
245    insertLabel(".float_to_int_posInf", true);
246    //fstps CHECK
247    goToState(2);
248    store_fp_stack_VR(true, OpndSize_32, vA);
249    set_VR_to_imm(vA, OpndSize_32, 0x7fffffff);
250    transferToState(3);
251    unconditional_jump(".float_to_int_okay", true);
252    insertLabel(".float_to_int_negInf", true);
253    goToState(1);
254    //fstps CHECK
255    store_fp_stack_VR(true, OpndSize_32, vA);
256    store_fp_stack_VR(true, OpndSize_32, vA);
257    set_VR_to_imm(vA, OpndSize_32, 0x80000000);
258    transferToState(3);
259    insertLabel(".float_to_int_okay", true);
260    return 0;
261}
262//! lower bytecode FLOAT_TO_INT by calling common_fp_to_int
263
264//!
265int op_float_to_int() {
266    u2 vA = INST_A(inst); //destination
267    u2 vB = INST_B(inst);
268    int retval = common_fp_to_int(false, vA, vB);
269    rPC += 1;
270    return retval;
271}
272//! lower bytecode DOUBLE_TO_INT by calling common_fp_to_int
273
274//!
275int op_double_to_int() {
276    u2 vA = INST_A(inst); //destination
277    u2 vB = INST_B(inst);
278    int retval = common_fp_to_int(true, vA, vB);
279    rPC += 1;
280    return retval;
281}
282
283//! common code to convert float or double to long
284
285//! It uses FP stack
286int common_fp_to_long(bool isDouble, u2 vA, u2 vB) {
287    if(isDouble) {
288        load_fp_stack_VR(OpndSize_64, vB); //fldl
289    }
290    else {
291        load_fp_stack_VR(OpndSize_32, vB); //flds
292    }
293
294    //Check if it is the special Negative Infinity value
295    load_fp_stack_global_data_API("valueNegInfLong", OpndSize_64);
296    //Stack status: ST(0) ST(1) --> LlongMin value
297    compare_fp_stack(true, 1, false/*isDouble*/); // Pops ST(1)
298    conditional_jump(Condition_AE, ".float_to_long_negInf", true);
299    rememberState(1);
300
301    //Check if it is the special Positive Infinity value
302    load_fp_stack_global_data_API("valuePosInfLong", OpndSize_64);
303    //Stack status: ST(0) ST(1) --> LlongMax value
304    compare_fp_stack(true, 1, false/*isDouble*/); // Pops ST(1)
305    rememberState(2);
306    conditional_jump(Condition_C, ".float_to_long_nanInf", true);
307
308    //Normal Case
309    //We want to truncate to 0 for conversion. That will be rounding mode 0x11
310    load_effective_addr(-2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
311    store_fpu_cw(false/*checkException*/, 0, PhysicalReg_ESP, true);
312    //Change control word to rounding mode 11:
313    alu_binary_imm_mem(OpndSize_16, or_opc, 0xc00, 0, PhysicalReg_ESP, true);
314    //Load the control word
315    load_fpu_cw(0, PhysicalReg_ESP, true);
316    //Reset the control word
317    alu_binary_imm_mem(OpndSize_16, xor_opc, 0xc00, 0, PhysicalReg_ESP, true);
318    //Perform the actual conversion
319    store_int_fp_stack_VR(true/*pop*/, OpndSize_64, vA); //fistpll
320    // Restore the original control word
321    load_fpu_cw(0, PhysicalReg_ESP, true);
322    load_effective_addr(2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
323    rememberState(3);
324    /* NOTE: We do not need to pop out the original value we pushed
325     * since load_fpu_cw above already clears the stack for
326     * normal values.
327     */
328    unconditional_jump(".float_to_long_okay", true);
329
330    //We can be here for positive infinity or NaN. Check parity bit
331    insertLabel(".float_to_long_nanInf", true);
332    conditional_jump(Condition_NP, ".float_to_long_posInf", true);
333    goToState(2);
334    //Save corresponding Long NaN value
335    load_global_data_API("valueNanLong", OpndSize_64, 1, false);
336    set_virtual_reg(vA, OpndSize_64, 1, false);
337    transferToState(3);
338    //Pop out the original value we pushed
339    compare_fp_stack(true, 0, false/*isDouble*/); //ST(0)
340    unconditional_jump(".float_to_long_okay", true);
341
342    insertLabel(".float_to_long_posInf", true);
343    goToState(2);
344    //Save corresponding Long Positive Infinity value
345    load_global_data_API("valuePosInfLong", OpndSize_64, 2, false);
346    set_virtual_reg(vA, OpndSize_64, 2, false);
347    transferToState(3);
348    //Pop out the original value we pushed
349    compare_fp_stack(true, 0, false/*isDouble*/); //ST(0)
350    unconditional_jump(".float_to_long_okay", true);
351
352    insertLabel(".float_to_long_negInf", true);
353    //fstpl
354    goToState(1);
355    //Load corresponding Long Negative Infinity value
356    load_global_data_API("valueNegInfLong", OpndSize_64, 3, false);
357    set_virtual_reg(vA, OpndSize_64, 3, false);
358    transferToState(3);
359    //Pop out the original value we pushed
360    compare_fp_stack(true, 0, false/*isDouble*/); //ST(0)
361
362    insertLabel(".float_to_long_okay", true);
363    return 0;
364}
365//! lower bytecode FLOAT_TO_LONG by calling common_fp_to_long
366
367//!
368int op_float_to_long() {
369    u2 vA = INST_A(inst); //destination
370    u2 vB = INST_B(inst);
371    int retval = common_fp_to_long(false, vA, vB);
372    rPC += 1;
373    return retval;
374}
375//! lower bytecode DOUBLE_TO_LONG by calling common_fp_to_long
376
377//!
378int op_double_to_long() {
379    u2 vA = INST_A(inst); //destination
380    u2 vB = INST_B(inst);
381    int retval = common_fp_to_long(true, vA, vB);
382    rPC += 1;
383    return retval;
384}
385#define P_GPR_1 PhysicalReg_EBX
386//! lower bytecode INT_TO_BYTE
387
388//! It uses GPR
389int op_int_to_byte() {
390    u2 vA = INST_A(inst); //destination
391    u2 vB = INST_B(inst);
392    get_virtual_reg(vB, OpndSize_32, 1, false);
393    alu_binary_imm_reg(OpndSize_32, sal_opc, 24, 1, false);
394    alu_binary_imm_reg(OpndSize_32, sar_opc, 24, 1, false);
395    set_virtual_reg(vA, OpndSize_32, 1, false);
396    rPC += 1;
397    return 0;
398}
399//! lower bytecode INT_TO_CHAR
400
401//! It uses GPR
402int op_int_to_char() {
403    u2 vA = INST_A(inst); //destination
404    u2 vB = INST_B(inst);
405    get_virtual_reg(vB, OpndSize_32, 1, false);
406    alu_binary_imm_reg(OpndSize_32, sal_opc, 16, 1, false);
407    alu_binary_imm_reg(OpndSize_32, shr_opc, 16, 1, false);
408    set_virtual_reg(vA, OpndSize_32, 1, false);
409    rPC += 1;
410    return 0;
411}
412//! lower bytecode INT_TO_SHORT
413
414//! It uses GPR
415int op_int_to_short() {
416    u2 vA = INST_A(inst); //destination
417    u2 vB = INST_B(inst);
418    get_virtual_reg(vB, OpndSize_32, 1, false);
419    alu_binary_imm_reg(OpndSize_32, sal_opc, 16, 1, false);
420    alu_binary_imm_reg(OpndSize_32, sar_opc, 16, 1, false);
421    set_virtual_reg(vA, OpndSize_32, 1, false);
422    rPC += 1;
423    return 0;
424}
425//! common code to handle integer ALU ops
426
427//! It uses GPR
428int common_alu_int(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) { //except div and rem
429    get_virtual_reg(v1, OpndSize_32, 1, false);
430    //in encoder, reg is first operand, which is the destination
431    //gpr_1 op v2(rFP) --> gpr_1
432    //shift only works with reg cl, v2 should be in %ecx
433    alu_binary_VR_reg(OpndSize_32, opc, v2, 1, false);
434    set_virtual_reg(vA, OpndSize_32, 1, false);
435    return 0;
436}
437#undef P_GPR_1
438#define P_GPR_1 PhysicalReg_EBX
439//! common code to handle integer shift ops
440
441//! It uses GPR
442int common_shift_int(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) {
443    get_virtual_reg(v2, OpndSize_32, PhysicalReg_ECX, true);
444    get_virtual_reg(v1, OpndSize_32, 1, false);
445    //in encoder, reg2 is first operand, which is the destination
446    //gpr_1 op v2(rFP) --> gpr_1
447    //shift only works with reg cl, v2 should be in %ecx
448    alu_binary_reg_reg(OpndSize_32, opc, PhysicalReg_ECX, true, 1, false);
449    set_virtual_reg(vA, OpndSize_32, 1, false);
450    return 0;
451}
452#undef p_GPR_1
453//! lower bytecode ADD_INT by calling common_alu_int
454
455//!
456int op_add_int() {
457    u2 vA, v1, v2;
458    vA = INST_AA(inst);
459    v1 = *((u1*)rPC + 2);
460    v2 = *((u1*)rPC + 3);
461    int retval = common_alu_int(add_opc, vA, v1, v2);
462    rPC += 2;
463    return retval;
464}
465//! lower bytecode SUB_INT by calling common_alu_int
466
467//!
468int op_sub_int() {
469    u2 vA, v1, v2;
470    vA = INST_AA(inst);
471    v1 = *((u1*)rPC + 2);
472    v2 = *((u1*)rPC + 3);
473    int retval = common_alu_int(sub_opc, vA, v1, v2);
474    rPC += 2;
475    return retval;
476}
477//! lower bytecode MUL_INT by calling common_alu_int
478
479//!
480int op_mul_int() {
481    u2 vA, v1, v2;
482    vA = INST_AA(inst);
483    v1 = *((u1*)rPC + 2);
484    v2 = *((u1*)rPC + 3);
485    int retval = common_alu_int(imul_opc, vA, v1, v2);
486    rPC += 2;
487    return retval;
488}
489//! lower bytecode AND_INT by calling common_alu_int
490
491//!
492int op_and_int() {
493    u2 vA, v1, v2;
494    vA = INST_AA(inst);
495    v1 = *((u1*)rPC + 2);
496    v2 = *((u1*)rPC + 3);
497    int retval = common_alu_int(and_opc, vA, v1, v2);
498    rPC += 2;
499    return retval;
500}
501//! lower bytecode OR_INT by calling common_alu_int
502
503//!
504int op_or_int() {
505    u2 vA, v1, v2;
506    vA = INST_AA(inst);
507    v1 = *((u1*)rPC + 2);
508    v2 = *((u1*)rPC + 3);
509    int retval = common_alu_int(or_opc, vA, v1, v2);
510    rPC += 2;
511    return retval;
512}
513//! lower bytecode XOR_INT by calling common_alu_int
514
515//!
516int op_xor_int() {
517    u2 vA, v1, v2;
518    vA = INST_AA(inst);
519    v1 = *((u1*)rPC + 2);
520    v2 = *((u1*)rPC + 3);
521    int retval = common_alu_int(xor_opc, vA, v1, v2);
522    rPC += 2;
523    return retval;
524}
525//! lower bytecode SHL_INT by calling common_shift_int
526
527//!
528int op_shl_int() {
529    u2 vA, v1, v2;
530    vA = INST_AA(inst);
531    v1 = *((u1*)rPC + 2);
532    v2 = *((u1*)rPC + 3);
533    int retval = common_shift_int(shl_opc, vA, v1, v2);
534    rPC += 2;
535    return retval;
536}
537//! lower bytecode SHR_INT by calling common_shift_int
538
539//!
540int op_shr_int() {
541    u2 vA, v1, v2;
542    vA = INST_AA(inst);
543    v1 = *((u1*)rPC + 2);
544    v2 = *((u1*)rPC + 3);
545    int retval = common_shift_int(sar_opc, vA, v1, v2);
546    rPC += 2;
547    return retval;
548}
549//! lower bytecode USHR_INT by calling common_shift_int
550
551//!
552int op_ushr_int() {
553    u2 vA, v1, v2;
554    vA = INST_AA(inst);
555    v1 = *((u1*)rPC + 2);
556    v2 = *((u1*)rPC + 3);
557    int retval = common_shift_int(shr_opc, vA, v1, v2);
558    rPC += 2;
559    return retval;
560}
561//! lower bytecode ADD_INT_2ADDR by calling common_alu_int
562
563//!
564int op_add_int_2addr() {
565    u2 vA, v1, v2;
566    vA = INST_A(inst);
567    v1 = vA;
568    v2 = INST_B(inst);
569    int retval = common_alu_int(add_opc, vA, v1, v2);
570    rPC += 1;
571    return retval;
572}
573//! lower bytecode SUB_INT_2ADDR by calling common_alu_int
574
575//!
576int op_sub_int_2addr() {
577    u2 vA, v1, v2;
578    vA = INST_A(inst);
579    v1 = vA;
580    v2 = INST_B(inst);
581    int retval = common_alu_int(sub_opc, vA, v1, v2);
582    rPC += 1;
583    return retval;
584}
585//! lower bytecode MUL_INT_2ADDR by calling common_alu_int
586
587//!
588int op_mul_int_2addr() {
589    u2 vA, v1, v2;
590    vA = INST_A(inst);
591    v1 = vA;
592    v2 = INST_B(inst);
593    int retval = common_alu_int(imul_opc, vA, v1, v2);
594    rPC += 1;
595    return retval;
596}
597//! lower bytecode AND_INT_2ADDR by calling common_alu_int
598
599//!
600int op_and_int_2addr() {
601    u2 vA, v1, v2;
602    vA = INST_A(inst);
603    v1 = vA;
604    v2 = INST_B(inst);
605    int retval = common_alu_int(and_opc, vA, v1, v2);
606    rPC += 1;
607    return retval;
608}
609//! lower bytecode OR_INT_2ADDR by calling common_alu_int
610
611//!
612int op_or_int_2addr() {
613    u2 vA, v1, v2;
614    vA = INST_A(inst);
615    v1 = vA;
616    v2 = INST_B(inst);
617    int retval = common_alu_int(or_opc, vA, v1, v2);
618    rPC += 1;
619    return retval;
620}
621//! lower bytecode XOR_INT_2ADDR by calling common_alu_int
622
623//!
624int op_xor_int_2addr() {
625    u2 vA, v1, v2;
626    vA = INST_A(inst);
627    v1 = vA;
628    v2 = INST_B(inst);
629    int retval = common_alu_int(xor_opc, vA, v1, v2);
630    rPC += 1;
631    return retval;
632}
633//! lower bytecode SHL_INT_2ADDR by calling common_shift_int
634
635//!
636int op_shl_int_2addr() {
637    u2 vA, v1, v2;
638    vA = INST_A(inst);
639    v1 = vA;
640    v2 = INST_B(inst);
641    int retval = common_shift_int(shl_opc, vA, v1, v2);
642    rPC += 1;
643    return retval;
644}
645//! lower bytecode SHR_INT_2ADDR by calling common_shift_int
646
647//!
648int op_shr_int_2addr() {
649    u2 vA, v1, v2;
650    vA = INST_A(inst);
651    v1 = vA;
652    v2 = INST_B(inst);
653    int retval = common_shift_int(sar_opc, vA, v1, v2);
654    rPC += 1;
655    return retval;
656}
657//! lower bytecode USHR_INT_2ADDR by calling common_shift_int
658
659//!
660int op_ushr_int_2addr() {
661    u2 vA, v1, v2;
662    vA = INST_A(inst);
663    v1 = vA;
664    v2 = INST_B(inst);
665    int retval = common_shift_int(shr_opc, vA, v1, v2);
666    rPC += 1;
667    return retval;
668}
669#define P_GPR_1 PhysicalReg_EBX
670//!common code to handle integer DIV & REM, it used GPR
671
672//!The special case: when op0 == minint && op1 == -1, return 0 for isRem, return 0x80000000 for isDiv
673//!There are two merge points in the control flow for this bytecode
674//!make sure the reg. alloc. state is the same at merge points by calling transferToState
675int common_div_rem_int(bool isRem, u2 vA, u2 v1, u2 v2) {
676    get_virtual_reg(v1, OpndSize_32, PhysicalReg_EAX, true);
677    get_virtual_reg(v2, OpndSize_32, 2, false);
678    compare_imm_reg(OpndSize_32, 0, 2, false);
679    handlePotentialException(
680                                       Condition_E, Condition_NE,
681                                       1, "common_errDivideByZero");
682    /////////////////// handle special cases
683    //conditional move 0 to $edx for rem for the two special cases
684    //conditional move 0x80000000 to $eax for div
685    //handle -1 special case divide error
686    compare_imm_reg(OpndSize_32, -1, 2, false);
687    conditional_jump(Condition_NE, ".common_div_rem_int_normal", true);
688    //handle min int special case divide error
689    rememberState(1);
690    compare_imm_reg(OpndSize_32, 0x80000000, PhysicalReg_EAX, true);
691    transferToState(1);
692    conditional_jump(Condition_E, ".common_div_rem_int_special", true);
693
694    insertLabel(".common_div_rem_int_normal", true); //merge point
695    convert_integer(OpndSize_32, OpndSize_64); //cdq
696    //idiv: dividend in edx:eax; quotient in eax; remainder in edx
697    alu_unary_reg(OpndSize_32, idiv_opc, 2, false);
698    if(isRem)
699        set_virtual_reg(vA, OpndSize_32, PhysicalReg_EDX, true);
700    else //divide: quotient in %eax
701        set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
702    rememberState(2);
703    unconditional_jump(".common_div_rem_int_okay", true);
704
705    insertLabel(".common_div_rem_int_special", true);
706    goToState(1);
707    if(isRem)
708        set_VR_to_imm(vA, OpndSize_32, 0);
709    else
710        set_VR_to_imm(vA, OpndSize_32, 0x80000000);
711    transferToState(2);
712    insertLabel(".common_div_rem_int_okay", true); //merge point 2
713    return 0;
714}
715#undef P_GPR_1
716//! lower bytecode DIV_INT by calling common_div_rem_int
717
718//!
719int op_div_int() {
720    u2 vA, v1, v2;
721    vA = INST_AA(inst);
722    v1 = *((u1*)rPC + 2);
723    v2 = *((u1*)rPC + 3);
724    int retval = common_div_rem_int(false, vA, v1, v2);
725    rPC += 2;
726    return retval;
727}
728//! lower bytecode REM_INT by calling common_div_rem_int
729
730//!
731int op_rem_int() {
732    u2 vA, v1, v2;
733    vA = INST_AA(inst);
734    v1 = *((u1*)rPC + 2);
735    v2 = *((u1*)rPC + 3);
736    int retval = common_div_rem_int(true, vA, v1, v2);
737    rPC += 2;
738    return retval;
739}
740//! lower bytecode DIV_INT_2ADDR by calling common_div_rem_int
741
742//!
743int op_div_int_2addr() {
744    u2 vA = INST_A(inst);
745    u2 v1 = vA;
746    u2 v2 = INST_B(inst);
747    int retval = common_div_rem_int(false, vA, v1, v2);
748    rPC += 1;
749    return retval;
750}
751//! lower bytecode REM_INT_2ADDR by calling common_div_rem_int
752
753//!
754int op_rem_int_2addr() {
755    u2 vA = INST_A(inst);
756    u2 v1 = vA;
757    u2 v2 = INST_B(inst);
758    int retval = common_div_rem_int(true, vA, v1, v2);
759    rPC += 1;
760    return retval;
761}
762
763#define P_GPR_1 PhysicalReg_EBX
764//! common code to handle integer ALU ops with literal
765
766//! It uses GPR
767int common_alu_int_lit(ALU_Opcode opc, u2 vA, u2 vB, s2 imm) { //except div and rem
768    get_virtual_reg(vB, OpndSize_32, 1, false);
769    alu_binary_imm_reg(OpndSize_32, opc, imm, 1, false);
770    set_virtual_reg(vA, OpndSize_32, 1, false);
771    return 0;
772}
773//! calls common_alu_int_lit
774int common_shift_int_lit(ALU_Opcode opc, u2 vA, u2 vB, s2 imm) {
775    return common_alu_int_lit(opc, vA, vB, imm);
776}
777#undef p_GPR_1
778//! lower bytecode ADD_INT_LIT16 by calling common_alu_int_lit
779
780//!
781int op_add_int_lit16() {
782    u2 vA = INST_A(inst);
783    u2 vB = INST_B(inst);
784    s4 tmp = (s2)FETCH(1);
785    int retval = common_alu_int_lit(add_opc, vA, vB, tmp);
786    rPC += 2;
787    return retval;
788}
789
790int alu_rsub_int(ALU_Opcode opc, u2 vA, s2 imm, u2 vB) {
791    move_imm_to_reg(OpndSize_32, imm, 2, false);
792    get_virtual_reg(vB, OpndSize_32, 1, false);
793    alu_binary_reg_reg(OpndSize_32, opc, 1, false, 2, false);
794    set_virtual_reg(vA, OpndSize_32, 2, false);
795    return 0;
796}
797
798
799//! lower bytecode RSUB_INT by calling common_alu_int_lit
800
801//!
802int op_rsub_int() {
803    u2 vA = INST_A(inst);
804    u2 vB = INST_B(inst);
805    s4 tmp = (s2)FETCH(1);
806    int retval = alu_rsub_int(sub_opc, vA, tmp, vB);
807    rPC += 2;
808    return retval;
809}
810//! lower bytecode MUL_INT_LIT16 by calling common_alu_int_lit
811
812//!
813int op_mul_int_lit16() {
814    u2 vA = INST_A(inst);
815    u2 vB = INST_B(inst);
816    s4 tmp = (s2)FETCH(1);
817    int retval = common_alu_int_lit(imul_opc, vA, vB, tmp);
818    rPC += 2;
819    return retval;
820}
821//! lower bytecode AND_INT_LIT16 by calling common_alu_int_lit
822
823//!
824int op_and_int_lit16() {
825    u2 vA = INST_A(inst);
826    u2 vB = INST_B(inst);
827    s4 tmp = (s2)FETCH(1);
828    int retval = common_alu_int_lit(and_opc, vA, vB, tmp);
829    rPC += 2;
830    return retval;
831}
832//! lower bytecode OR_INT_LIT16 by calling common_alu_int_lit
833
834//!
835int op_or_int_lit16() {
836    u2 vA = INST_A(inst);
837    u2 vB = INST_B(inst);
838    s4 tmp = (s2)FETCH(1);
839    int retval = common_alu_int_lit(or_opc, vA, vB, tmp);
840    rPC += 2;
841    return retval;
842}
843//! lower bytecode XOR_INT_LIT16 by calling common_alu_int_lit
844
845//!
846int op_xor_int_lit16() {
847    u2 vA = INST_A(inst);
848    u2 vB = INST_B(inst);
849    s4 tmp = (s2)FETCH(1);
850    int retval = common_alu_int_lit(xor_opc, vA, vB, tmp);
851    rPC += 2;
852    return retval;
853}
854//! lower bytecode SHL_INT_LIT16 by calling common_shift_int_lit
855
856//!
857int op_shl_int_lit16() {
858    u2 vA = INST_A(inst);
859    u2 vB = INST_B(inst);
860    s4 tmp = (s2)FETCH(1);
861    int retval = common_shift_int_lit(shl_opc, vA, vB, tmp);
862    rPC += 2;
863    return retval;
864}
865//! lower bytecode SHR_INT_LIT16 by calling common_shift_int_lit
866
867//!
868int op_shr_int_lit16() {
869    u2 vA = INST_A(inst);
870    u2 vB = INST_B(inst);
871    s4 tmp = (s2)FETCH(1);
872    int retval = common_shift_int_lit(sar_opc, vA, vB, tmp);
873    rPC += 2;
874    return retval;
875}
876//! lower bytecode USHR_INT_LIT16 by calling common_shift_int_lit
877
878//!
879int op_ushr_int_lit16() {
880    u2 vA = INST_A(inst);
881    u2 vB = INST_B(inst);
882    s4 tmp = (s2)FETCH(1);
883    int retval = common_shift_int_lit(shr_opc, vA, vB, tmp);
884    rPC += 2;
885    return retval;
886}
887//! lower bytecode ADD_INT_LIT8 by calling common_alu_int_lit
888
889//!
890int op_add_int_lit8() {
891    u2 vA = INST_AA(inst);
892    u2 vB = (u2)FETCH(1) & 0xff;
893    s2 tmp = (s2)FETCH(1) >> 8;
894    int retval = common_alu_int_lit(add_opc, vA, vB, tmp);
895    rPC += 2;
896    return retval;
897}
898//! lower bytecode RSUB_INT_LIT8 by calling common_alu_int_lit
899
900//!
901int op_rsub_int_lit8() {
902    u2 vA = INST_AA(inst);
903    u2 vB = (u2)FETCH(1) & 0xff;
904    s2 tmp = (s2)FETCH(1) >> 8;
905    int retval = alu_rsub_int(sub_opc, vA, tmp, vB);
906    rPC += 2;
907    return retval;
908}
909//! lower bytecode MUL_INT_LIT8 by calling common_alu_int_lit
910
911//!
912int op_mul_int_lit8() {
913    u2 vA = INST_AA(inst);
914    u2 vB = (u2)FETCH(1) & 0xff;
915    s2 tmp = (s2)FETCH(1) >> 8;
916    int retval = common_alu_int_lit(imul_opc, vA, vB, tmp);
917    rPC += 2;
918    return retval;
919}
920//! lower bytecode AND_INT_LIT8 by calling common_alu_int_lit
921
922//!
923int op_and_int_lit8() {
924    u2 vA = INST_AA(inst);
925    u2 vB = (u2)FETCH(1) & 0xff;
926    s2 tmp = (s2)FETCH(1) >> 8;
927    int retval = common_alu_int_lit(and_opc, vA, vB, tmp);
928    rPC += 2;
929    return retval;
930}
931//! lower bytecode OR_INT_LIT8 by calling common_alu_int_lit
932
933//!
934int op_or_int_lit8() {
935    u2 vA = INST_AA(inst);
936    u2 vB = (u2)FETCH(1) & 0xff;
937    s2 tmp = (s2)FETCH(1) >> 8;
938    int retval = common_alu_int_lit(or_opc, vA, vB, tmp);
939    rPC += 2;
940    return retval;
941}
942//! lower bytecode XOR_INT_LIT8 by calling common_alu_int_lit
943
944//!
945int op_xor_int_lit8() {
946    u2 vA = INST_AA(inst);
947    u2 vB = (u2)FETCH(1) & 0xff;
948    s2 tmp = (s2)FETCH(1) >> 8;
949    int retval = common_alu_int_lit(xor_opc, vA, vB, tmp);
950    rPC += 2;
951    return retval;
952}
953//! lower bytecode SHL_INT_LIT8 by calling common_shift_int_lit
954
955//!
956int op_shl_int_lit8() {
957    u2 vA = INST_AA(inst);
958    u2 vB = (u2)FETCH(1) & 0xff;
959    s2 tmp = (s2)FETCH(1) >> 8;
960    int retval = common_shift_int_lit(shl_opc, vA, vB, tmp);
961    rPC += 2;
962    return retval;
963}
964//! lower bytecode SHR_INT_LIT8 by calling common_shift_int_lit
965
966//!
967int op_shr_int_lit8() {
968    u2 vA = INST_AA(inst);
969    u2 vB = (u2)FETCH(1) & 0xff;
970    s2 tmp = (s2)FETCH(1) >> 8;
971    int retval = common_shift_int_lit(sar_opc, vA, vB, tmp);
972    rPC += 2;
973    return retval;
974}
975//! lower bytecode USHR_INT_LIT8 by calling common_shift_int_lit
976
977//!
978int op_ushr_int_lit8() {
979    u2 vA = INST_AA(inst);
980    u2 vB = (u2)FETCH(1) & 0xff;
981    s2 tmp = (s2)FETCH(1) >> 8;
982    int retval = common_shift_int_lit(shr_opc, vA, vB, tmp);
983    rPC += 2;
984    return retval;
985}
986
987int isPowerOfTwo(int imm) {
988    int i;
989    for(i = 1; i < 17; i++) {
990        if(imm == (1 << i)) return i;
991    }
992    return -1;
993}
994
995#define P_GPR_1 PhysicalReg_EBX
996int div_lit_strength_reduction(u2 vA, u2 vB, s2 imm) {
997    if(gDvm.executionMode == kExecutionModeNcgO1) {
998        //strength reduction for div by 2,4,8,...
999        int power = isPowerOfTwo(imm);
1000        if(power < 1) return 0;
1001        //tmp2 is not updated, so it can share with vB
1002        get_virtual_reg(vB, OpndSize_32, 2, false);
1003        //if imm is 2, power will be 1
1004        if(power == 1) {
1005            /* mov tmp1, tmp2
1006               shrl $31, tmp1
1007               addl tmp2, tmp1
1008               sarl $1, tmp1 */
1009            move_reg_to_reg(OpndSize_32, 2, false, 1, false);
1010            alu_binary_imm_reg(OpndSize_32, shr_opc, 31, 1, false);
1011            alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false);
1012            alu_binary_imm_reg(OpndSize_32, sar_opc, 1, 1, false);
1013            set_virtual_reg(vA, OpndSize_32, 1, false);
1014            return 1;
1015        }
1016        //power > 1
1017        /* mov tmp1, tmp2
1018           sarl $power-1, tmp1
1019           shrl 32-$power, tmp1
1020           addl tmp2, tmp1
1021           sarl $power, tmp1 */
1022        move_reg_to_reg(OpndSize_32, 2, false, 1, false);
1023        alu_binary_imm_reg(OpndSize_32, sar_opc, power-1, 1, false);
1024        alu_binary_imm_reg(OpndSize_32, shr_opc, 32-power, 1, false);
1025        alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false);
1026        alu_binary_imm_reg(OpndSize_32, sar_opc, power, 1, false);
1027        set_virtual_reg(vA, OpndSize_32, 1, false);
1028        return 1;
1029    }
1030    return 0;
1031}
1032
1033////////// throws exception!!!
1034//! common code to handle integer DIV & REM with literal
1035
1036//! It uses GPR
1037int common_div_rem_int_lit(bool isRem, u2 vA, u2 vB, s2 imm) {
1038    if(!isRem) {
1039        int retCode = div_lit_strength_reduction(vA, vB, imm);
1040        if(retCode > 0) return 0;
1041    }
1042    if(imm == 0) {
1043        export_pc(); //use %edx
1044#ifdef DEBUG_EXCEPTION
1045        LOGI("EXTRA code to handle exception");
1046#endif
1047        constVREndOfBB();
1048        beforeCall("exception"); //dump GG, GL VRs
1049        unconditional_jump_global_API(
1050                          "common_errDivideByZero", false);
1051
1052        return 0;
1053    }
1054    get_virtual_reg(vB, OpndSize_32, PhysicalReg_EAX, true);
1055    //check against -1 for DIV_INT??
1056    if(imm == -1) {
1057        compare_imm_reg(OpndSize_32, 0x80000000, PhysicalReg_EAX, true);
1058        conditional_jump(Condition_E, ".div_rem_int_lit_special", true);
1059        rememberState(1);
1060    }
1061    move_imm_to_reg(OpndSize_32, imm, 2, false);
1062    convert_integer(OpndSize_32, OpndSize_64); //cdq
1063    //idiv: dividend in edx:eax; quotient in eax; remainder in edx
1064    alu_unary_reg(OpndSize_32, idiv_opc, 2, false);
1065    if(isRem)
1066        set_virtual_reg(vA, OpndSize_32, PhysicalReg_EDX, true);
1067    else
1068        set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
1069
1070    if(imm == -1) {
1071        unconditional_jump(".div_rem_int_lit_okay", true);
1072        rememberState(2);
1073        insertLabel(".div_rem_int_lit_special", true);
1074        goToState(1);
1075        if(isRem)
1076            set_VR_to_imm(vA, OpndSize_32, 0);
1077        else
1078            set_VR_to_imm(vA, OpndSize_32, 0x80000000);
1079        transferToState(2);
1080    }
1081
1082    insertLabel(".div_rem_int_lit_okay", true); //merge point 2
1083    return 0;
1084}
1085#undef P_GPR_1
1086//! lower bytecode DIV_INT_LIT16 by calling common_div_rem_int_lit
1087
1088//!
1089int op_div_int_lit16() {
1090    u2 vA = INST_A(inst);
1091    u2 vB = INST_B(inst);
1092    s4 tmp = (s2)FETCH(1);
1093    int retval = common_div_rem_int_lit(false, vA, vB, tmp);
1094    rPC += 2;
1095    return retval;
1096}
1097//! lower bytecode REM_INT_LIT16 by calling common_div_rem_int_lit
1098
1099//!
1100int op_rem_int_lit16() {
1101    u2 vA = INST_A(inst);
1102    u2 vB = INST_B(inst);
1103    s4 tmp = (s2)FETCH(1);
1104    int retval = common_div_rem_int_lit(true, vA, vB, tmp);
1105    rPC += 2;
1106    return retval;
1107}
1108//! lower bytecode DIV_INT_LIT8 by calling common_div_rem_int_lit
1109
1110//!
1111int op_div_int_lit8() {
1112    u2 vA = INST_AA(inst);
1113    u2 vB = (u2)FETCH(1) & 0xff;
1114    s2 tmp = (s2)FETCH(1) >> 8;
1115    int retval = common_div_rem_int_lit(false, vA, vB, tmp);
1116    rPC += 2;
1117    return retval;
1118}
1119//! lower bytecode REM_INT_LIT8 by calling common_div_rem_int_lit
1120
1121//!
1122int op_rem_int_lit8() {
1123    u2 vA = INST_AA(inst);
1124    u2 vB = (u2)FETCH(1) & 0xff;
1125    s2 tmp = (s2)FETCH(1) >> 8;
1126    int retval = common_div_rem_int_lit(true, vA, vB, tmp);
1127    rPC += 2;
1128    return retval;
1129}
1130//! common code to hanle long ALU ops
1131
1132//! It uses XMM
1133int common_alu_long(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) { //except div and rem
1134    get_virtual_reg(v1, OpndSize_64, 1, false);
1135    get_virtual_reg(v2, OpndSize_64, 2, false);
1136    alu_binary_reg_reg(OpndSize_64, opc, 2, false, 1, false);
1137    set_virtual_reg(vA, OpndSize_64, 1, false);
1138    return 0;
1139}
1140//! lower bytecode ADD_LONG by calling common_alu_long
1141
1142//!
1143int op_add_long() {
1144    u2 vA = INST_AA(inst);
1145    u2 v1 = *((u1*)rPC + 2);
1146    u2 v2 = *((u1*)rPC + 3);
1147    int retval = common_alu_long(add_opc, vA, v1, v2);
1148    rPC += 2;
1149    return retval;
1150}
1151//! lower bytecode SUB_LONG by calling common_alu_long
1152
1153//!
1154int op_sub_long() {
1155    u2 vA = INST_AA(inst);
1156    u2 v1 = *((u1*)rPC + 2);
1157    u2 v2 = *((u1*)rPC + 3);
1158    int retval = common_alu_long(sub_opc, vA, v1, v2);
1159    rPC += 2;
1160    return retval;
1161}
1162//! lower bytecode AND_LONG by calling common_alu_long
1163
1164//!
1165int op_and_long() {
1166    u2 vA = INST_AA(inst);
1167    u2 v1 = *((u1*)rPC + 2);
1168    u2 v2 = *((u1*)rPC + 3);
1169    int retval = common_alu_long(and_opc, vA, v1, v2);
1170    rPC += 2;
1171    return retval;
1172}
1173//! lower bytecode OR_LONG by calling common_alu_long
1174
1175//!
1176int op_or_long() {
1177    u2 vA = INST_AA(inst);
1178    u2 v1 = *((u1*)rPC + 2);
1179    u2 v2 = *((u1*)rPC + 3);
1180    int retval = common_alu_long(or_opc, vA, v1, v2);
1181    rPC += 2;
1182    return retval;
1183}
1184//! lower bytecode XOR_LONG by calling common_alu_long
1185
1186//!
1187int op_xor_long() {
1188    u2 vA = INST_AA(inst);
1189    u2 v1 = *((u1*)rPC + 2);
1190    u2 v2 = *((u1*)rPC + 3);
1191    int retval = common_alu_long(xor_opc, vA, v1, v2);
1192    rPC += 2;
1193    return retval;
1194}
1195//! lower bytecode ADD_LONG_2ADDR by calling common_alu_long
1196
1197//!
1198int op_add_long_2addr() {
1199    u2 vA = INST_A(inst);
1200    u2 v1 = vA;
1201    u2 v2 = INST_B(inst);
1202    int retval = common_alu_long(add_opc, vA, v1, v2);
1203    rPC += 1;
1204    return retval;
1205}
1206//! lower bytecode SUB_LONG_2ADDR by calling common_alu_long
1207
1208//!
1209int op_sub_long_2addr() {
1210    u2 vA = INST_A(inst);
1211    u2 v1 = vA;
1212    u2 v2 = INST_B(inst);
1213    int retval = common_alu_long(sub_opc, vA, v1, v2);
1214    rPC += 1;
1215    return retval;
1216}
1217//! lower bytecode AND_LONG_2ADDR by calling common_alu_long
1218
1219//!
1220int op_and_long_2addr() {
1221    u2 vA = INST_A(inst);
1222    u2 v1 = vA;
1223    u2 v2 = INST_B(inst);
1224    int retval = common_alu_long(and_opc, vA, v1, v2);
1225    rPC += 1;
1226    return retval;
1227}
1228//! lower bytecode OR_LONG_2ADDR by calling common_alu_long
1229
1230//!
1231int op_or_long_2addr() {
1232    u2 vA = INST_A(inst);
1233    u2 v1 = vA;
1234    u2 v2 = INST_B(inst);
1235    int retval = common_alu_long(or_opc, vA, v1, v2);
1236    rPC += 1;
1237    return retval;
1238}
1239//! lower bytecode XOR_LONG_2ADDR by calling common_alu_long
1240
1241//!
1242int op_xor_long_2addr() {
1243    u2 vA = INST_A(inst);
1244    u2 v1 = vA;
1245    u2 v2 = INST_B(inst);
1246    int retval = common_alu_long(xor_opc, vA, v1, v2);
1247    rPC += 1;
1248    return retval;
1249}
1250
1251//signed vs unsigned imul and mul?
1252#define P_GPR_1 PhysicalReg_EBX
1253#define P_GPR_2 PhysicalReg_ECX
1254#define P_GPR_3 PhysicalReg_ESI
1255//! common code to handle multiplication of long
1256
1257//! It uses GPR
1258int common_mul_long(u2 vA, u2 v1, u2 v2) {
1259    get_virtual_reg(v2, OpndSize_32, 1, false);
1260    move_reg_to_reg(OpndSize_32, 1, false, PhysicalReg_EAX, true);
1261    //imul: 2L * 1H update temporary 1
1262    alu_binary_VR_reg(OpndSize_32, imul_opc, (v1+1), 1, false);
1263    get_virtual_reg(v1, OpndSize_32, 3, false);
1264    move_reg_to_reg(OpndSize_32, 3, false, 2, false);
1265    //imul: 1L * 2H
1266    alu_binary_VR_reg(OpndSize_32, imul_opc, (v2+1), 2, false);
1267    alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false);
1268    alu_unary_reg(OpndSize_32, mul_opc, 3, false);
1269    alu_binary_reg_reg(OpndSize_32, add_opc, PhysicalReg_EDX, true, 1, false);
1270    set_virtual_reg(vA+1, OpndSize_32, 1, false);
1271    set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
1272    return 0;
1273}
1274#undef P_GPR_1
1275#undef P_GPR_2
1276#undef P_GPR_3
1277//! lower bytecode MUL_LONG by calling common_mul_long
1278
1279//!
1280int op_mul_long() {
1281    u2 vA = INST_AA(inst);
1282    u2 v1 = *((u1*)rPC + 2);
1283    u2 v2 = *((u1*)rPC + 3);
1284    int retval = common_mul_long(vA, v1, v2);
1285    rPC += 2;
1286    return retval;
1287}
1288//! lower bytecode MUL_LONG_2ADDR by calling common_mul_long
1289
1290//!
1291int op_mul_long_2addr() {
1292    u2 vA = INST_A(inst);
1293    u2 v1 = vA;
1294    u2 v2 = INST_B(inst);
1295    int retval = common_mul_long(vA, v1, v2);
1296    rPC += 1;
1297    return retval;
1298}
1299
1300#define P_GPR_1 PhysicalReg_EBX
1301#define P_GPR_2 PhysicalReg_ECX
1302//! common code to handle DIV & REM of long
1303
1304//! It uses GPR & XMM; and calls call_moddi3 & call_divdi3
1305int common_div_rem_long(bool isRem, u2 vA, u2 v1, u2 v2) {
1306    get_virtual_reg(v2, OpndSize_32, 1, false);
1307    get_virtual_reg(v2+1, OpndSize_32, 2, false);
1308    //save to native stack before changing register P_GPR_1
1309    load_effective_addr(-16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
1310    move_reg_to_mem(OpndSize_32, 1, false, 8, PhysicalReg_ESP, true);
1311    alu_binary_reg_reg(OpndSize_32, or_opc, 2, false, 1, false);
1312
1313    handlePotentialException(
1314                                       Condition_E, Condition_NE,
1315                                       1, "common_errDivideByZero");
1316    move_reg_to_mem(OpndSize_32, 2, false, 12, PhysicalReg_ESP, true);
1317    get_virtual_reg(v1, OpndSize_64, 1, false);
1318    move_reg_to_mem(OpndSize_64, 1, false, 0, PhysicalReg_ESP, true);
1319    scratchRegs[0] = PhysicalReg_SCRATCH_1;
1320    nextVersionOfHardReg(PhysicalReg_EDX, 2); //next version has 2 refs
1321    if(isRem)
1322        call_moddi3();
1323    else
1324        call_divdi3();
1325    load_effective_addr(16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
1326    set_virtual_reg(vA+1, OpndSize_32,PhysicalReg_EDX, true);
1327    set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
1328    return 0;
1329}
1330#undef P_GPR_1
1331#undef P_GPR_2
1332//! lower bytecode DIV_LONG by calling common_div_rem_long
1333
1334//!
1335int op_div_long() {
1336    u2 vA = INST_AA(inst);
1337    u2 v1 = *((u1*)rPC + 2);
1338    u2 v2 = *((u1*)rPC + 3);
1339    int retval = common_div_rem_long(false, vA, v1, v2);
1340    rPC += 2;
1341    return retval;
1342}
1343//! lower bytecode REM_LONG by calling common_div_rem_long
1344
1345//!
1346int op_rem_long() {
1347    u2 vA = INST_AA(inst);
1348    u2 v1 = *((u1*)rPC + 2);
1349    u2 v2 = *((u1*)rPC + 3);
1350    int retval = common_div_rem_long(true, vA, v1, v2);
1351    rPC += 2;
1352    return retval;
1353}
1354//! lower bytecode DIV_LONG_2ADDR by calling common_div_rem_long
1355
1356//!
1357int op_div_long_2addr() {
1358    u2 vA = INST_A(inst);
1359    u2 v1 = vA;
1360    u2 v2 = INST_B(inst);
1361    int retval = common_div_rem_long(false, vA, v1, v2);
1362    rPC += 1;
1363    return retval;
1364}
1365//! lower bytecode REM_LONG_2ADDR by calling common_div_rem_long
1366
1367//!
1368int op_rem_long_2addr() { //call __moddi3 instead of __divdi3
1369    u2 vA = INST_A(inst);
1370    u2 v1 = vA;
1371    u2 v2 = INST_B(inst);
1372    int retval = common_div_rem_long(true, vA, v1, v2);
1373    rPC += 1;
1374    return retval;
1375}
1376
1377//! common code to handle SHL long
1378
1379//! It uses XMM
1380int common_shl_long(u2 vA, u2 v1, u2 v2) {
1381    get_VR_ss(v2, 2, false);
1382
1383    load_global_data_API("shiftMask", OpndSize_64, 3, false);
1384
1385    get_virtual_reg(v1, OpndSize_64, 1, false);
1386    alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false);
1387    alu_binary_reg_reg(OpndSize_64, sll_opc, 2, false, 1, false);
1388    set_virtual_reg(vA, OpndSize_64, 1, false);
1389    return 0;
1390}
1391
1392//! common code to handle SHR long
1393
1394//! It uses XMM
1395int common_shr_long(u2 vA, u2 v1, u2 v2) {
1396    get_VR_ss(v2, 2, false);
1397
1398    load_global_data_API("shiftMask", OpndSize_64, 3, false);
1399
1400    get_virtual_reg(v1, OpndSize_64, 1, false);
1401    alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false);
1402    alu_binary_reg_reg(OpndSize_64, srl_opc, 2, false, 1, false);
1403    compare_imm_VR(OpndSize_32, 0, (v1+1));
1404    conditional_jump(Condition_GE, ".common_shr_long_special", true);
1405    rememberState(1);
1406
1407    load_global_data_API("value64", OpndSize_64, 4, false);
1408
1409    alu_binary_reg_reg(OpndSize_64, sub_opc, 2, false, 4, false);
1410
1411    load_global_data_API("64bits", OpndSize_64, 5, false);
1412
1413    alu_binary_reg_reg(OpndSize_64, sll_opc, 4, false, 5, false);
1414    alu_binary_reg_reg(OpndSize_64, or_opc, 5, false, 1, false);
1415    rememberState(2);
1416    //check whether the target is next instruction TODO
1417    unconditional_jump(".common_shr_long_done", true);
1418
1419    insertLabel(".common_shr_long_special", true);
1420    goToState(1);
1421    transferToState(2);
1422    insertLabel(".common_shr_long_done", true);
1423    set_virtual_reg(vA, OpndSize_64, 1, false);
1424    return 0;
1425}
1426
1427//! common code to handle USHR long
1428
1429//! It uses XMM
1430int common_ushr_long(u2 vA, u2 v1, u2 v2) {
1431    get_VR_sd(v1, 1, false);
1432    get_VR_ss(v2, 2, false);
1433
1434    load_sd_global_data_API("shiftMask", 3, false);
1435
1436    alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false);
1437    alu_binary_reg_reg(OpndSize_64, srl_opc, 2, false, 1, false);
1438    set_VR_sd(vA, 1, false);
1439    return 0;
1440}
1441//! lower bytecode SHL_LONG by calling common_shl_long
1442
1443//!
1444int op_shl_long() {
1445    u2 vA = INST_AA(inst);
1446    u2 v1 = *((u1*)rPC + 2);
1447    u2 v2 = *((u1*)rPC + 3);
1448    int retval = common_shl_long(vA, v1, v2);
1449    rPC += 2;
1450    return retval;
1451}
1452//! lower bytecode SHL_LONG_2ADDR by calling common_shl_long
1453
1454//!
1455int op_shl_long_2addr() {
1456    u2 vA = INST_A(inst);
1457    u2 v1 = vA;
1458    u2 v2 = INST_B(inst);
1459    int retval = common_shl_long(vA, v1, v2);
1460    rPC += 1;
1461    return retval;
1462}
1463//! lower bytecode SHR_LONG by calling common_shr_long
1464
1465//!
1466int op_shr_long() {
1467    u2 vA = INST_AA(inst);
1468    u2 v1 = *((u1*)rPC + 2);
1469    u2 v2 = *((u1*)rPC + 3);
1470    int retval = common_shr_long(vA, v1, v2);
1471    rPC += 2;
1472    return retval;
1473}
1474//! lower bytecode SHR_LONG_2ADDR by calling common_shr_long
1475
1476//!
1477int op_shr_long_2addr() {
1478    u2 vA = INST_A(inst);
1479    u2 v1 = vA;
1480    u2 v2 = INST_B(inst);
1481    int retval = common_shr_long(vA, v1, v2);
1482    rPC += 1;
1483    return retval;
1484}
1485//! lower bytecode USHR_LONG by calling common_ushr_long
1486
1487//!
1488int op_ushr_long() {
1489    u2 vA = INST_AA(inst);
1490    u2 v1 = *((u1*)rPC + 2);
1491    u2 v2 = *((u1*)rPC + 3);
1492    int retval = common_ushr_long(vA, v1, v2);
1493    rPC += 2;
1494    return retval;
1495}
1496//! lower bytecode USHR_LONG_2ADDR by calling common_ushr_long
1497
1498//!
1499int op_ushr_long_2addr() {
1500    u2 vA = INST_A(inst);
1501    u2 v1 = vA;
1502    u2 v2 = INST_B(inst);
1503    int retval = common_ushr_long(vA, v1, v2);
1504    rPC += 1;
1505    return retval;
1506}
1507#define USE_MEM_OPERAND
1508///////////////////////////////////////////
1509//! common code to handle ALU of floats
1510
1511//! It uses XMM
1512int common_alu_float(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) {//add, sub, mul
1513    get_VR_ss(v1, 1, false);
1514#ifdef USE_MEM_OPERAND
1515    alu_sd_binary_VR_reg(opc, v2, 1, false, false/*isSD*/);
1516#else
1517    get_VR_ss(v2, 2, false);
1518    alu_ss_binary_reg_reg(opc, 2, false, 1, false);
1519#endif
1520    set_VR_ss(vA, 1, false);
1521    return 0;
1522}
1523//! lower bytecode ADD_FLOAT by calling common_alu_float
1524
1525//!
1526int op_add_float() {
1527    u2 vA = INST_AA(inst);
1528    u2 v1 = *((u1*)rPC + 2);
1529    u2 v2 = *((u1*)rPC + 3);
1530    int retval = common_alu_float(add_opc, vA, v1, v2);
1531    rPC += 2;
1532    return retval;
1533}
1534//! lower bytecode SUB_FLOAT by calling common_alu_float
1535
1536//!
1537int op_sub_float() {
1538    u2 vA = INST_AA(inst);
1539    u2 v1 = *((u1*)rPC + 2);
1540    u2 v2 = *((u1*)rPC + 3);
1541    int retval = common_alu_float(sub_opc, vA, v1, v2);
1542    rPC += 2;
1543    return retval;
1544}
1545//! lower bytecode MUL_FLOAT by calling common_alu_float
1546
1547//!
1548int op_mul_float() {
1549    u2 vA = INST_AA(inst);
1550    u2 v1 = *((u1*)rPC + 2);
1551    u2 v2 = *((u1*)rPC + 3);
1552    int retval = common_alu_float(mul_opc, vA, v1, v2);
1553    rPC += 2;
1554    return retval;
1555}
1556//! lower bytecode ADD_FLOAT_2ADDR by calling common_alu_float
1557
1558//!
1559int op_add_float_2addr() {
1560    u2 vA = INST_A(inst);
1561    u2 v1 = vA;
1562    u2 v2 = INST_B(inst);
1563    int retval = common_alu_float(add_opc, vA, v1, v2);
1564    rPC += 1;
1565    return retval;
1566}
1567//! lower bytecode SUB_FLOAT_2ADDR by calling common_alu_float
1568
1569//!
1570int op_sub_float_2addr() {
1571    u2 vA = INST_A(inst);
1572    u2 v1 = vA;
1573    u2 v2 = INST_B(inst);
1574    int retval = common_alu_float(sub_opc, vA, v1, v2);
1575    rPC += 1;
1576    return retval;
1577}
1578//! lower bytecode MUL_FLOAT_2ADDR by calling common_alu_float
1579
1580//!
1581int op_mul_float_2addr() {
1582    u2 vA = INST_A(inst);
1583    u2 v1 = vA;
1584    u2 v2 = INST_B(inst);
1585    int retval = common_alu_float(mul_opc, vA, v1, v2);
1586    rPC += 1;
1587    return retval;
1588}
1589//! common code to handle DIV of float
1590
1591//! It uses FP stack
1592int common_div_float(u2 vA, u2 v1, u2 v2) {
1593    load_fp_stack_VR(OpndSize_32, v1); //flds
1594    fpu_VR(div_opc, OpndSize_32, v2);
1595    store_fp_stack_VR(true, OpndSize_32, vA); //fstps
1596    return 0;
1597}
1598//! lower bytecode DIV_FLOAT by calling common_div_float
1599
1600//!
1601int op_div_float() {
1602    u2 vA = INST_AA(inst);
1603    u2 v1 = *((u1*)rPC + 2);
1604    u2 v2 = *((u1*)rPC + 3);
1605    int retval = common_alu_float(div_opc, vA, v1, v2);
1606    rPC += 2;
1607    return retval;
1608}
1609//! lower bytecode DIV_FLOAT_2ADDR by calling common_div_float
1610
1611//!
1612int op_div_float_2addr() {
1613    u2 vA = INST_A(inst);
1614    u2 v1 = vA;
1615    u2 v2 = INST_B(inst);
1616    int retval = common_alu_float(div_opc, vA, v1, v2);
1617    rPC += 1;
1618    return retval;
1619}
1620//! common code to handle DIV of double
1621
1622//! It uses XMM
1623int common_alu_double(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) {//add, sub, mul
1624    get_VR_sd(v1, 1, false);
1625#ifdef USE_MEM_OPERAND
1626    alu_sd_binary_VR_reg(opc, v2, 1, false, true /*isSD*/);
1627#else
1628    get_VR_sd(v2, 2, false);
1629    alu_sd_binary_reg_reg(opc, 2, false, 1, false);
1630#endif
1631    set_VR_sd(vA, 1, false);
1632    return 0;
1633}
1634//! lower bytecode ADD_DOUBLE by calling common_alu_double
1635
1636//!
1637int op_add_double() {
1638    u2 vA = INST_AA(inst);
1639    u2 v1 = *((u1*)rPC + 2);
1640    u2 v2 = *((u1*)rPC + 3);
1641    int retval = common_alu_double(add_opc, vA, v1, v2);
1642    rPC += 2;
1643    return retval;
1644}
1645//! lower bytecode SUB_DOUBLE by calling common_alu_double
1646
1647//!
1648int op_sub_double() {
1649    u2 vA = INST_AA(inst);
1650    u2 v1 = *((u1*)rPC + 2);
1651    u2 v2 = *((u1*)rPC + 3);
1652    int retval = common_alu_double(sub_opc, vA, v1, v2);
1653    rPC += 2;
1654    return retval;
1655}
1656//! lower bytecode MUL_DOUBLE by calling common_alu_double
1657
1658//!
1659int op_mul_double() {
1660    u2 vA = INST_AA(inst);
1661    u2 v1 = *((u1*)rPC + 2);
1662    u2 v2 = *((u1*)rPC + 3);
1663    int retval = common_alu_double(mul_opc, vA, v1, v2);
1664    rPC += 2;
1665    return retval;
1666}
1667//! lower bytecode ADD_DOUBLE_2ADDR by calling common_alu_double
1668
1669//!
1670int op_add_double_2addr() {
1671    u2 vA = INST_A(inst);
1672    u2 v1 = vA;
1673    u2 v2 = INST_B(inst);
1674    int retval = common_alu_double(add_opc, vA, v1, v2);
1675    rPC += 1;
1676    return retval;
1677}
1678//! lower bytecode SUB_DOUBLE_2ADDR by calling common_alu_double
1679
1680//!
1681int op_sub_double_2addr() {
1682    u2 vA = INST_A(inst);
1683    u2 v1 = vA;
1684    u2 v2 = INST_B(inst);
1685    int retval = common_alu_double(sub_opc, vA, v1, v2);
1686    rPC += 1;
1687    return retval;
1688}
1689//! lower bytecode MUL_DOUBLE_2ADDR by calling common_alu_double
1690
1691//!
1692int op_mul_double_2addr() {
1693    u2 vA = INST_A(inst);
1694    u2 v1 = vA;
1695    u2 v2 = INST_B(inst);
1696    int retval = common_alu_double(mul_opc, vA, v1, v2);
1697    rPC += 1;
1698    return retval;
1699}
1700//! common code to handle DIV of double
1701
1702//! It uses FP stack
1703int common_div_double(u2 vA, u2 v1, u2 v2) {
1704    load_fp_stack_VR(OpndSize_64, v1); //fldl
1705    fpu_VR(div_opc, OpndSize_64, v2); //fdivl
1706    store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
1707    return 0;
1708}
1709//! lower bytecode DIV_DOUBLE by calling common_div_double
1710
1711//!
1712int op_div_double() {
1713    u2 vA = INST_AA(inst);
1714    u2 v1 = *((u1*)rPC + 2);
1715    u2 v2 = *((u1*)rPC + 3);
1716    int retval = common_alu_double(div_opc, vA, v1, v2);
1717    rPC += 2;
1718    return retval;
1719}
1720//! lower bytecode DIV_DOUBLE_2ADDR by calling common_div_double
1721
1722//!
1723int op_div_double_2addr() {
1724    u2 vA = INST_A(inst);
1725    u2 v1 = vA;
1726    u2 v2 = INST_B(inst);
1727    int retval = common_alu_double(div_opc, vA, v1, v2);
1728    rPC += 1;
1729    return retval;
1730}
1731#define P_GPR_1 PhysicalReg_EBX
1732#define P_GPR_2 PhysicalReg_ECX
1733//! common code to handle REM of float
1734
1735//! It uses GPR & calls call_fmodf
1736int common_rem_float(u2 vA, u2 v1, u2 v2) {
1737    get_virtual_reg(v1, OpndSize_32, 1, false);
1738    get_virtual_reg(v2, OpndSize_32, 2, false);
1739    load_effective_addr(-8, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
1740    move_reg_to_mem(OpndSize_32, 1, false, 0, PhysicalReg_ESP, true);
1741    move_reg_to_mem(OpndSize_32, 2, false, 4, PhysicalReg_ESP, true);
1742    scratchRegs[0] = PhysicalReg_SCRATCH_1;
1743    call_fmodf(); //(float x, float y) return float
1744    load_effective_addr(8, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
1745    store_fp_stack_VR(true, OpndSize_32, vA); //fstps
1746    return 0;
1747}
1748#undef P_GPR_1
1749#undef P_GPR_2
1750//! lower bytecode REM_FLOAT by calling common_rem_float
1751
1752//!
1753int op_rem_float() {
1754    u2 vA = INST_AA(inst);
1755    u2 v1 = *((u1*)rPC + 2);
1756    u2 v2 = *((u1*)rPC + 3);
1757    int retval = common_rem_float(vA, v1, v2);
1758    rPC += 2;
1759    return retval;
1760}
1761//! lower bytecode REM_FLOAT_2ADDR by calling common_rem_float
1762
1763//!
1764int op_rem_float_2addr() {
1765    u2 vA = INST_A(inst);
1766    u2 v1 = vA;
1767    u2 v2 = INST_B(inst);
1768    int retval = common_rem_float(vA, v1, v2);
1769    rPC += 1;
1770    return retval;
1771}
1772//! common code to handle REM of double
1773
1774//! It uses XMM & calls call_fmod
1775int common_rem_double(u2 vA, u2 v1, u2 v2) {
1776    get_virtual_reg(v1, OpndSize_64, 1, false);
1777    get_virtual_reg(v2, OpndSize_64, 2, false);
1778    load_effective_addr(-16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
1779    move_reg_to_mem(OpndSize_64, 1, false, 0, PhysicalReg_ESP, true);
1780    move_reg_to_mem(OpndSize_64, 2, false, 8, PhysicalReg_ESP, true);
1781    scratchRegs[0] = PhysicalReg_SCRATCH_1;
1782    call_fmod(); //(long double x, long double y) return double
1783    load_effective_addr(16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
1784    store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
1785    return 0;
1786}
1787//! lower bytecode REM_DOUBLE by calling common_rem_double
1788
1789//!
1790int op_rem_double() {
1791    u2 vA = INST_AA(inst);
1792    u2 v1 = *((u1*)rPC + 2);
1793    u2 v2 = *((u1*)rPC + 3);
1794    int retval = common_rem_double(vA, v1, v2);
1795    rPC += 2;
1796    return retval;
1797}
1798//! lower bytecode REM_DOUBLE_2ADDR by calling common_rem_double
1799
1800//!
1801int op_rem_double_2addr() {
1802    u2 vA = INST_A(inst);
1803    u2 v1 = vA;
1804    u2 v2 = INST_B(inst);
1805    int retval = common_rem_double(vA, v1, v2);
1806    rPC += 1;
1807    return retval;
1808}
1809//! lower bytecode CMPL_FLOAT
1810
1811//!
1812int op_cmpl_float() {
1813    u2 vA = INST_AA(inst);
1814    u4 v1 = FETCH(1) & 0xff;
1815    u4 v2 = FETCH(1) >> 8;
1816    get_VR_ss(v1, 1, false); //xmm
1817    move_imm_to_reg(OpndSize_32, 0, 1, false);
1818    move_imm_to_reg(OpndSize_32, 1, 2, false);
1819    move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
1820    compare_VR_ss_reg(v2, 1, false);
1821    //default: 0xffffffff??
1822    move_imm_to_reg(OpndSize_32,
1823                                 0xffffffff, 4, false);
1824    //ORDER of cmov matters !!! (Z,P,A)
1825    //finalNaN: unordered 0xffffffff
1826    conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
1827                                             1, false, 4, false);
1828    conditional_move_reg_to_reg(OpndSize_32, Condition_P,
1829                                             3, false, 4, false);
1830    conditional_move_reg_to_reg(OpndSize_32, Condition_A,
1831                                             2, false, 4, false);
1832    set_virtual_reg(vA, OpndSize_32, 4, false);
1833    rPC += 2;
1834    return 0;
1835}
1836//! lower bytecode CMPG_FLOAT
1837
1838//!
1839int op_cmpg_float() {
1840    u2 vA = INST_AA(inst);
1841    u4 v1 = FETCH(1) & 0xff;
1842    u4 v2 = FETCH(1) >> 8;
1843    get_VR_ss(v1, 1, false);
1844    compare_VR_ss_reg(v2, 1, false);
1845    move_imm_to_reg(OpndSize_32, 0, 1, false);
1846    move_imm_to_reg(OpndSize_32, 1, 2, false);
1847    //default: 0xffffffff??
1848    move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
1849    conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
1850                                1, false, 3, false);
1851    //finalNaN: unordered
1852    conditional_move_reg_to_reg(OpndSize_32, Condition_P,
1853                                2, false, 3, false);
1854    conditional_move_reg_to_reg(OpndSize_32, Condition_A,
1855                                2, false, 3, false);
1856    set_virtual_reg(vA, OpndSize_32, 3, false);
1857    rPC += 2;
1858    return 0;
1859}
1860//! lower bytecode CMPL_DOUBLE
1861
1862//!
1863int op_cmpl_double() {
1864    u2 vA = INST_AA(inst);
1865    u4 v1 = FETCH(1) & 0xff;
1866    u4 v2 = FETCH(1) >> 8;
1867    get_VR_sd(v1, 1, false);
1868    compare_VR_sd_reg(v2, 1, false);
1869    move_imm_to_reg(OpndSize_32, 0, 1, false);
1870    move_imm_to_reg(OpndSize_32, 1, 2, false);
1871    move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
1872
1873    //default: 0xffffffff??
1874    move_imm_to_reg(OpndSize_32, 0xffffffff, 4, false);
1875    conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
1876                                             1, false, 4, false);
1877    conditional_move_reg_to_reg(OpndSize_32, Condition_P,
1878                                             3, false, 4, false);
1879    conditional_move_reg_to_reg(OpndSize_32, Condition_A,
1880                                             2, false, 4, false);
1881    set_virtual_reg(vA, OpndSize_32, 4, false);
1882    rPC += 2;
1883    return 0;
1884}
1885//! lower bytecode CMPG_DOUBLE
1886
1887//!
1888int op_cmpg_double() {
1889    u2 vA = INST_AA(inst);
1890    u4 v1 = FETCH(1) & 0xff;
1891    u4 v2 = FETCH(1) >> 8;
1892    get_VR_sd(v1, 1, false);
1893    compare_VR_sd_reg(v2, 1, false);
1894    move_imm_to_reg(OpndSize_32, 0, 1, false);
1895    move_imm_to_reg(OpndSize_32, 1, 2, false);
1896
1897    //default: 0xffffffff??
1898    move_imm_to_reg(OpndSize_32,
1899                                 0xffffffff, 3, false);
1900    conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
1901                                             1, false, 3, false);
1902    //finalNaN: unordered
1903    conditional_move_reg_to_reg(OpndSize_32, Condition_P,
1904                                             2, false, 3, false);
1905    conditional_move_reg_to_reg(OpndSize_32, Condition_A,
1906                                             2, false, 3, false);
1907   set_virtual_reg(vA, OpndSize_32, 3, false);
1908    rPC += 2;
1909    return 0;
1910}
1911#define P_GPR_1 PhysicalReg_EBX
1912#define P_GPR_2 PhysicalReg_ECX
1913#define P_GPR_3 PhysicalReg_ESI
1914#define P_SCRATCH_1 PhysicalReg_EDX
1915#define P_SCRATCH_2 PhysicalReg_EAX
1916#define OPTION_OLD //for simpler cfg
1917//! lower bytecode CMP_LONG
1918
1919//!
1920int op_cmp_long() {
1921    u2 vA = INST_AA(inst);
1922    u4 v1 = FETCH(1) & 0xff;
1923    u4 v2 = FETCH(1) >> 8;
1924    get_virtual_reg(v1+1, OpndSize_32, 2, false);
1925#ifdef OPTION_OLD
1926    move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
1927    move_imm_to_reg(OpndSize_32, 1, 4, false);
1928    move_imm_to_reg(OpndSize_32, 0, 5, false);
1929#endif
1930    compare_VR_reg(OpndSize_32,
1931                                v2+1, 2, false);
1932#ifndef OPTION_OLD
1933    conditional_jump(Condition_L, ".cmp_long_less", true);
1934    conditional_jump(Condition_G, ".cmp_long_greater", true);
1935#else
1936    conditional_jump(Condition_E, ".cmp_long_equal", true);
1937    rememberState(1);
1938    conditional_move_reg_to_reg(OpndSize_32, Condition_L, //below vs less
1939                                             3, false, 6, false);
1940    conditional_move_reg_to_reg(OpndSize_32, Condition_G, //above vs greater
1941                                             4, false, 6, false);
1942    set_virtual_reg(vA, OpndSize_32, 6, false);
1943    rememberState(2);
1944    unconditional_jump(".cmp_long_okay", true);
1945    insertLabel(".cmp_long_equal", true);
1946    goToState(1);
1947#endif
1948
1949    get_virtual_reg(v1, OpndSize_32, 1, false);
1950    compare_VR_reg(OpndSize_32,
1951                                v2, 1, false);
1952#ifdef OPTION_OLD
1953    conditional_move_reg_to_reg(OpndSize_32, Condition_E,
1954                                             5, false, 6, false);
1955    conditional_move_reg_to_reg(OpndSize_32, Condition_B, //below vs less
1956                                             3, false, 6, false);
1957    conditional_move_reg_to_reg(OpndSize_32, Condition_A, //above vs greater
1958                                             4, false, 6, false);
1959    set_virtual_reg(vA, OpndSize_32, 6, false);
1960    transferToState(2);
1961#else
1962    conditional_jump(Condition_A, ".cmp_long_greater", true);
1963    conditional_jump(Condition_NE, ".cmp_long_less", true);
1964    set_VR_to_imm(vA, OpndSize_32, 0);
1965    unconditional_jump(".cmp_long_okay", true);
1966
1967    insertLabel(".cmp_long_less", true);
1968    set_VR_to_imm(vA, OpndSize_32, 0xffffffff);
1969    unconditional_jump(".cmp_long_okay", true);
1970
1971    insertLabel(".cmp_long_greater", true);
1972    set_VR_to_imm(vA, OpndSize_32, 1);
1973#endif
1974    insertLabel(".cmp_long_okay", true);
1975    rPC += 2;
1976    return 0;
1977}
1978#undef P_GPR_1
1979#undef P_GPR_2
1980#undef P_GPR_3
1981