11452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee /* 21452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee * Signed 64-bit integer multiply. 31452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee * 41452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee * Consider WXxYZ (r1r0 x r3r2) with a long multiply: 51452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee * WX 61452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee * x YZ 71452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee * -------- 81452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee * ZW ZX 91452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee * YW YX 101452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee * 111452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee * The low word of the result holds ZX, the high word holds 121452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee * (ZW+YX) + (the high overflow from ZX). YW doesn't matter because 131452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee * it doesn't fit in the low 64 bits. 141452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee * 151452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee * Unlike most ARM math operations, multiply instructions have 161452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee * restrictions on using the same register more than once (Rd and Rm 171452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee * cannot be the same). 181452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee */ 191452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee /* mul-long vAA, vBB, vCC */ 201452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee FETCH r0, 1 @ r0<- CCBB 211452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee and r2, r0, #255 @ r2<- BB 221452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee mov r3, r0, lsr #8 @ r3<- CC 23ace690f5e440930d7bbad97fdbfdc3eb65e230bebuzbee VREG_INDEX_TO_ADDR r2, r2 @ r2<- &fp[BB] 24ace690f5e440930d7bbad97fdbfdc3eb65e230bebuzbee VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[CC] 251452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee ldmia r2, {r0-r1} @ r0/r1<- vBB/vBB+1 261452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee ldmia r3, {r2-r3} @ r2/r3<- vCC/vCC+1 271d011d9306fd4ff57d72411775d415a86f5ed398Bill Buzbee mul ip, r2, r1 @ ip<- ZxW 281d011d9306fd4ff57d72411775d415a86f5ed398Bill Buzbee umull r1, lr, r2, r0 @ r1/lr <- ZxX 291d011d9306fd4ff57d72411775d415a86f5ed398Bill Buzbee mla r2, r0, r3, ip @ r2<- YxX + (ZxW) 301452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee mov r0, rINST, lsr #8 @ r0<- AA 311d011d9306fd4ff57d72411775d415a86f5ed398Bill Buzbee add r2, r2, lr @ r2<- lr + low(ZxW + (YxX)) 32ace690f5e440930d7bbad97fdbfdc3eb65e230bebuzbee VREG_INDEX_TO_ADDR r0, r0 @ r0<- &fp[AA] 331452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 341452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee GET_INST_OPCODE ip @ extract opcode from rINST 351d011d9306fd4ff57d72411775d415a86f5ed398Bill Buzbee stmia r0, {r1-r2 } @ vAA/vAA+1<- r1/r2 361452bee8f06b9f76a333ddf4760e4beaa82f8099buzbee GOTO_OPCODE ip @ jump to next instruction 37