1;// 2;// 3;// File Name: armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s 4;// OpenMAX DL: v1.0.2 5;// Revision: 9641 6;// Date: Thursday, February 7, 2008 7;// 8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9;// 10;// 11;// 12 13 INCLUDE omxtypes_s.h 14 INCLUDE armCOMM_s.h 15 16 M_VARIANTS ARM1136JS 17 18 EXPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 19 20DEBUG_ON SETL {FALSE} 21 22 23 IF ARM1136JS 24 25;// Function: 26;// armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 27;// 28;// Implements horizontal interpolation for a block of size 4x4. Input and output should 29;// be aligned. 30;// 31;// Registers used as input for this function 32;// r0,r1,r2,r3 where r0,r2 input pointer and r1,r3 corresponding step size 33;// 34;// Registers preserved for top level function 35;// r0,r1,r2,r3,r4,r5,r6,r14 36;// 37;// Registers modified by the function 38;// r7,r8,r9,r10,r11,r12 39;// 40;// Output registers 41;// None. Function will preserve r0-r3 42 43 44;// Declare input registers 45pSrc RN 0 46srcStep RN 1 47pDst RN 2 48dstStep RN 3 49 50;// Declare inner loop registers 51Acc0 RN 4 52Acc1 RN 5 53Acc2 RN 6 54Acc3 RN 7 55 56ValA RN 4 57ValB RN 5 58ValC RN 6 59ValD RN 7 60ValE RN 8 61ValF RN 9 62ValG RN 12 63ValH RN 14 64ValI RN 1 65 66Temp1 RN 3 67Temp2 RN 1 68Temp3 RN 12 69Temp4 RN 7 70Temp5 RN 5 71r0x0fe00fe0 RN 3 ;// [0 (16*255 - 16) 0 (16*255 - 16)] 72r0x00ff00ff RN 10 ;// [0 255 0 255] where 255 is offset 73Counter RN 11 74 75Height RN 3 76 77 M_ALLOC4 pDstStep, 4 78 M_ALLOC4 pSrcStep, 4 79 80 ;// Function header 81 M_START armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe, r6 82 83 MOV Counter, #2 84 M_STR dstStep, pDstStep 85 M_STR srcStep, pSrcStep 86 LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results 87 88NextTwoRowsLoop 89 LDR ValD, [pSrc, srcStep] ;// Load row 1 [d1 c1 b1 a1] 90 LDR ValA, [pSrc], #4 ;// Load row 0 [d0 c0 b0 a0] 91 LDR ValH, [pSrc, srcStep] ;// Load [h1 g1 f1 e1] 92 LDR ValE, [pSrc], #4 ;// Load [h0 g0 f0 e0] 93 LDRB Temp2, [pSrc, srcStep] ;// Load row 1 [l1 k1 j1 i1] 94 LDRB Temp1, [pSrc], #-8 ;// Load row 0 [l0 k0 j0 i0] 95 96 PKHBT ValB, ValA, ValD, LSL #16 ;// [b1 a1 b0 a0] 97 PKHTB ValD, ValD, ValA, ASR #16 ;// [d1 c1 d0 c0] 98 UXTAB16 ValA, r0x00ff00ff, ValB ;// [00 a1 00 a0] + [0 255 0 255] 99 UXTAB16 ValC, r0x00ff00ff, ValD ;// [00 c1 00 c0] + [0 255 0 255] 100 PKHBT ValI, Temp1, Temp2, LSL #16 ;// [00 i1 00 i0] 101 PKHBT ValF, ValE, ValH, LSL #16 ;// [f1 e1 f0 e0] 102 PKHTB ValH, ValH, ValE, ASR #16 ;// [h1 g1 h0 g0] 103 UXTAB16 ValE, r0x00ff00ff, ValF ;// [00 e1 00 e0] + [0 255 0 255] 104 105 ;// Calculate Acc0 106 ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f 107 UXTAB16 Temp1, ValC, ValD, ROR #8 108 UXTAB16 Temp3, ValE, ValB, ROR #8 109 RSB Temp1, Temp3, Temp1, LSL #2 110 UXTAB16 Acc0, ValA, ValF, ROR #8 111 ADD Temp1, Temp1, Temp1, LSL #2 112 ADD Acc0, Acc0, Temp1 113 114 ;// Calculate Acc1 115 ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g 116 UXTAB16 Temp1, ValE, ValD, ROR #8 117 UXTAB16 Temp3, ValC, ValF, ROR #8 118 RSB Temp1, Temp3, Temp1, LSL #2 119 UXTAB16 ValG, r0x00ff00ff, ValH ;// [00 g1 00 g0] + [0 255 0 255] 120 ADD Temp1, Temp1, Temp1, LSL #2 121 UXTAB16 Acc1, ValG, ValB, ROR #8 122 ADD Acc1, Acc1, Temp1 123 124 LDR r0x0fe00fe0, =0x0fe00fe0 ;// 0x0fe00fe0 = (16 * Offset) - 16 where Offset is 255 125 UXTAB16 Acc2, ValC, ValH, ROR #8 126 ADD ValI, r0x00ff00ff, ValI ;// [00 i1 00 i0] + [0 255 0 255] 127 UQSUB16 Acc0, Acc0, r0x0fe00fe0 128 UQSUB16 Acc1, Acc1, r0x0fe00fe0 129 USAT16 Acc0, #13, Acc0 130 USAT16 Acc1, #13, Acc1 131 132 ;// Calculate Acc2 133 ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h 134 UXTAB16 Temp1, ValG, ValD, ROR #8 135 UXTAB16 Acc3, ValI, ValD, ROR #8 136 UXTAB16 Temp2, ValE, ValF, ROR #8 137 AND Acc1, r0x00ff00ff, Acc1, LSR #5 138 AND Acc0, r0x00ff00ff, Acc0, LSR #5 139 ORR Acc0, Acc0, Acc1, LSL #8 140 RSB Temp5, Temp1, Temp2, LSL #2 141 UXTAB16 Temp2, ValG, ValF, ROR #8 142 ADD Temp5, Temp5, Temp5, LSL #2 143 ADD Acc2, Acc2, Temp5 144 145 ;// Calculate Acc3 146 ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i 147 UXTAB16 Temp5, ValE, ValH, ROR #8 148 RSB Temp5, Temp5, Temp2, LSL #2 149 LDR r0x0fe00fe0, =0x0fe00fe0 150 ADD Temp5, Temp5, Temp5, LSL #2 151 ADD Acc3, Acc3, Temp5 152 153 UQSUB16 Acc3, Acc3, r0x0fe00fe0 154 UQSUB16 Acc2, Acc2, r0x0fe00fe0 155 USAT16 Acc3, #13, Acc3 156 USAT16 Acc2, #13, Acc2 157 158 M_LDR dstStep, pDstStep 159 AND Acc3, r0x00ff00ff, Acc3, LSR #5 160 AND Acc2, r0x00ff00ff, Acc2, LSR #5 161 ORR Acc2, Acc2, Acc3, LSL #8 162 163 SUBS Counter, Counter, #1 164 M_LDR srcStep, pSrcStep 165 PKHBT Acc1, Acc0, Acc2, LSL #16 166 M_STR Acc1, [pDst], dstStep ;// Store result1 167 PKHTB Acc2, Acc2, Acc0, ASR #16 168 M_STR Acc2, [pDst], dstStep ;// Store result2 169 ADD pSrc, pSrc, srcStep, LSL #1 170 171 BGT NextTwoRowsLoop 172End 173 SUB pDst, pDst, dstStep, LSL #2 174 SUB pSrc, pSrc, srcStep, LSL #2 175 176 M_END 177 178 ENDIF 179 180 END 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240