omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S revision 22e06318d04074d1a7c90caa173857abdcfd153e
1/* 2 * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 3 * 4 */ 5 6 .eabi_attribute 24, 1 7 .eabi_attribute 25, 1 8 9 .arm 10 .fpu neon 11 .text 12 13 .global omxVCM4P10_DequantTransformResidualFromPairAndAdd 14 .func omxVCM4P10_DequantTransformResidualFromPairAndAdd 15omxVCM4P10_DequantTransformResidualFromPairAndAdd: 16 PUSH {r4-r12,lr} 17 VPUSH {d8-d9} 18 SUB sp,sp,#0x20 19 ADD r4,sp,#0 20 LDR r5,[sp,#0x64] 21 MOV r7,r1 22 MOV r8,r2 23 MOV r9,r3 24 CMP r5,#0 25 BEQ L0x114 26 MOV r1,r4 27 BL armVCM4P10_UnpackBlock4x4 ;// 28 LDR r1,[sp,#0x60] 29 LDR r11, .LarmVCM4P10_QPModuloTable 30P0: ADD r11, pc 31 LDR r10, .LarmVCM4P10_QPDivTable 32P1: ADD r10, pc 33 LDR r2, .LarmVCM4P10_VMatrixU16 34P2: ADD r2, pc 35 LDRSB r12,[r11,r1] 36 LDRSB lr,[r10,r1] 37 LDR r10, =0x3020504 38 LDR r1, =0x5040100 39 ADD r2,r2,r12 40 VDUP.32 d7,r1 41 VDUP.32 d9,r10 42 VDUP.16 d5,lr 43 VLD1.8 {d6},[r2] 44 VTBL.8 d8,{d6},d7 45 VTBL.8 d4,{d6},d9 46 CMP r8,#0 47 VLD1.16 {d0,d1,d2,d3},[r4] 48 VSHL.U16 d8,d8,d5 49 VSHL.U16 d4,d4,d5 50 BEQ L1 51 LDRSH r10,[r8,#0] 52L1: 53 VMUL.I16 d0,d0,d8 54 VMUL.I16 d1,d1,d4 55 VMUL.I16 d2,d2,d8 56 VMUL.I16 d3,d3,d4 57 VMOVNE.16 d0[0],r10 58 VTRN.16 d0,d1 59 VTRN.16 d2,d3 60 VTRN.32 q0,q1 61 VMOV.I16 d4,#0 62 VADD.I16 d5,d0,d2 63 VSUB.I16 d6,d0,d2 64 VHADD.S16 d7,d1,d4 65 VHADD.S16 d8,d3,d4 66 VSUB.I16 d7,d7,d3 67 VADD.I16 d8,d1,d8 68 VADD.I16 d0,d5,d8 69 VADD.I16 d1,d6,d7 70 VSUB.I16 d2,d6,d7 71 VSUB.I16 d3,d5,d8 72 VTRN.16 d0,d1 73 VTRN.16 d2,d3 74 VTRN.32 q0,q1 75 VADD.I16 d5,d0,d2 76 VSUB.I16 d6,d0,d2 77 VHADD.S16 d7,d1,d4 78 VHADD.S16 d8,d3,d4 79 VSUB.I16 d7,d7,d3 80 VADD.I16 d8,d1,d8 81 VADD.I16 d0,d5,d8 82 VADD.I16 d1,d6,d7 83 VSUB.I16 d2,d6,d7 84 VSUB.I16 d3,d5,d8 85 VRSHR.S16 d0,d0,#6 86 VRSHR.S16 d1,d1,#6 87 VRSHR.S16 d2,d2,#6 88 VRSHR.S16 d3,d3,#6 89 B L0x130 90L0x114: 91 LDRSH r10,[r8,#0] 92 ADD r10,r10,#0x20 93 ASR r10,r10,#6 94 VDUP.16 d0,r10 95 VDUP.16 d1,r10 96 VDUP.16 d2,r10 97 VDUP.16 d3,r10 98L0x130: 99 LDR r1,[sp,#0x58] 100 LDR r10,[sp,#0x5c] 101 LDR r3,[r7],r1 102 LDR r5,[r7],r1 103 VMOV d4,r3,r5 104 LDR r3,[r7],r1 105 LDR r5,[r7,#0] 106 VMOV d5,r3,r5 107 VADDW.U8 q3,q0,d4 108 VADDW.U8 q4,q1,d5 109 VQMOVUN.S16 d0,q3 110 VQMOVUN.S16 d1,q4 111 VST1.32 {d0[0]},[r9],r10 112 VST1.32 {d0[1]},[r9],r10 113 VST1.32 {d1[0]},[r9],r10 114 VST1.32 {d1[1]},[r9] 115 MOV r0,#0 116 ADD sp,sp,#0x20 117 VPOP {d8-d9} 118 POP {r4-r12,pc} 119 .endfunc 120 121.LarmVCM4P10_QPModuloTable: 122 .word armVCM4P10_QPModuloTable-(P0+8) 123.LarmVCM4P10_QPDivTable: 124 .word armVCM4P10_QPDivTable-(P1+8) 125.LarmVCM4P10_VMatrixU16: 126 .word armVCM4P10_VMatrixU16-(P2+8) 127 128 .end 129 130