1%verify "executed"
2%verify "basic lt, gt, eq"
3%verify "hi equal, lo <=>"
4%verify "lo equal, hi <=>"
5    /*
6     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
7     * register based on the results of the comparison.
8     *
9     * We load the full values with LDM, but in practice many values could
10     * be resolved by only looking at the high word.  This could be made
11     * faster or slower by splitting the LDM into a pair of LDRs.
12     *
13     * If we just wanted to set condition flags, we could do this:
14     *  subs    ip, r0, r2
15     *  sbcs    ip, r1, r3
16     *  subeqs  ip, r0, r2
17     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
18     * integer value, which we can do with 2 conditional mov/mvn instructions
19     * (set 1, set -1; if they're equal we already have 0 in ip), giving
20     * us a constant 5-cycle path plus a branch at the end to the
21     * instruction epilogue code.  The multi-compare approach below needs
22     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
23     * in the worst case (the 64-bit values are equal).
24     */
25    /* cmp-long vAA, vBB, vCC */
26    FETCH(r0, 1)                        @ r0<- CCBB
27    mov     r9, rINST, lsr #8           @ r9<- AA
28    and     r2, r0, #255                @ r2<- BB
29    mov     r3, r0, lsr #8              @ r3<- CC
30    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
31    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
32    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
33    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
34    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
35    blt     .L${opcode}_less            @ signed compare on high part
36    bgt     .L${opcode}_greater
37    subs    r1, r0, r2                  @ r1<- r0 - r2
38    bhi     .L${opcode}_greater         @ unsigned compare on low part
39    bne     .L${opcode}_less
40    b       .L${opcode}_finish          @ equal; r1 already holds 0
41%break
42
43.L${opcode}_less:
44    mvn     r1, #0                      @ r1<- -1
45    @ Want to cond code the next mov so we can avoid branch, but don't see it;
46    @ instead, we just replicate the tail end.
47    FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
48    SET_VREG(r1, r9)                    @ vAA<- r1
49    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
50    GOTO_OPCODE(ip)                     @ jump to next instruction
51
52.L${opcode}_greater:
53    mov     r1, #1                      @ r1<- 1
54    @ fall through to _finish
55
56.L${opcode}_finish:
57    FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
58    SET_VREG(r1, r9)                    @ vAA<- r1
59    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
60    GOTO_OPCODE(ip)                     @ jump to next instruction
61