1;// 2;// Copyright (C) 2007-2008 ARM Limited 3;// 4;// Licensed under the Apache License, Version 2.0 (the "License"); 5;// you may not use this file except in compliance with the License. 6;// You may obtain a copy of the License at 7;// 8;// http://www.apache.org/licenses/LICENSE-2.0 9;// 10;// Unless required by applicable law or agreed to in writing, software 11;// distributed under the License is distributed on an "AS IS" BASIS, 12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13;// See the License for the specific language governing permissions and 14;// limitations under the License. 15;// 16;// 17;// 18;// File Name: armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s 19;// OpenMAX DL: v1.0.2 20;// Revision: 9641 21;// Date: Thursday, February 7, 2008 22;// 23;// 24;// 25;// 26 27 INCLUDE omxtypes_s.h 28 INCLUDE armCOMM_s.h 29 30 M_VARIANTS ARM1136JS 31 32 EXPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 33 34DEBUG_ON SETL {FALSE} 35 36 37 IF ARM1136JS 38 39;// Function: 40;// armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 41;// 42;// Implements horizontal interpolation for a block of size 4x4. Input and output should 43;// be aligned. 44;// 45;// Registers used as input for this function 46;// r0,r1,r2,r3 where r0,r2 input pointer and r1,r3 corresponding step size 47;// 48;// Registers preserved for top level function 49;// r0,r1,r2,r3,r4,r5,r6,r14 50;// 51;// Registers modified by the function 52;// r7,r8,r9,r10,r11,r12 53;// 54;// Output registers 55;// None. Function will preserve r0-r3 56 57 58;// Declare input registers 59pSrc RN 0 60srcStep RN 1 61pDst RN 2 62dstStep RN 3 63 64;// Declare inner loop registers 65Acc0 RN 4 66Acc1 RN 5 67Acc2 RN 6 68Acc3 RN 7 69 70ValA RN 4 71ValB RN 5 72ValC RN 6 73ValD RN 7 74ValE RN 8 75ValF RN 9 76ValG RN 12 77ValH RN 14 78ValI RN 1 79 80Temp1 RN 3 81Temp2 RN 1 82Temp3 RN 12 83Temp4 RN 7 84Temp5 RN 5 85r0x0fe00fe0 RN 3 ;// [0 (16*255 - 16) 0 (16*255 - 16)] 86r0x00ff00ff RN 10 ;// [0 255 0 255] where 255 is offset 87Counter RN 11 88 89Height RN 3 90 91 M_ALLOC4 pDstStep, 4 92 M_ALLOC4 pSrcStep, 4 93 94 ;// Function header 95 M_START armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe, r6 96 97 MOV Counter, #2 98 M_STR dstStep, pDstStep 99 M_STR srcStep, pSrcStep 100 LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results 101 102NextTwoRowsLoop 103 LDR ValD, [pSrc, srcStep] ;// Load row 1 [d1 c1 b1 a1] 104 LDR ValA, [pSrc], #4 ;// Load row 0 [d0 c0 b0 a0] 105 LDR ValH, [pSrc, srcStep] ;// Load [h1 g1 f1 e1] 106 LDR ValE, [pSrc], #4 ;// Load [h0 g0 f0 e0] 107 LDRB Temp2, [pSrc, srcStep] ;// Load row 1 [l1 k1 j1 i1] 108 LDRB Temp1, [pSrc], #-8 ;// Load row 0 [l0 k0 j0 i0] 109 110 PKHBT ValB, ValA, ValD, LSL #16 ;// [b1 a1 b0 a0] 111 PKHTB ValD, ValD, ValA, ASR #16 ;// [d1 c1 d0 c0] 112 UXTAB16 ValA, r0x00ff00ff, ValB ;// [00 a1 00 a0] + [0 255 0 255] 113 UXTAB16 ValC, r0x00ff00ff, ValD ;// [00 c1 00 c0] + [0 255 0 255] 114 PKHBT ValI, Temp1, Temp2, LSL #16 ;// [00 i1 00 i0] 115 PKHBT ValF, ValE, ValH, LSL #16 ;// [f1 e1 f0 e0] 116 PKHTB ValH, ValH, ValE, ASR #16 ;// [h1 g1 h0 g0] 117 UXTAB16 ValE, r0x00ff00ff, ValF ;// [00 e1 00 e0] + [0 255 0 255] 118 119 ;// Calculate Acc0 120 ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f 121 UXTAB16 Temp1, ValC, ValD, ROR #8 122 UXTAB16 Temp3, ValE, ValB, ROR #8 123 RSB Temp1, Temp3, Temp1, LSL #2 124 UXTAB16 Acc0, ValA, ValF, ROR #8 125 ADD Temp1, Temp1, Temp1, LSL #2 126 ADD Acc0, Acc0, Temp1 127 128 ;// Calculate Acc1 129 ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g 130 UXTAB16 Temp1, ValE, ValD, ROR #8 131 UXTAB16 Temp3, ValC, ValF, ROR #8 132 RSB Temp1, Temp3, Temp1, LSL #2 133 UXTAB16 ValG, r0x00ff00ff, ValH ;// [00 g1 00 g0] + [0 255 0 255] 134 ADD Temp1, Temp1, Temp1, LSL #2 135 UXTAB16 Acc1, ValG, ValB, ROR #8 136 ADD Acc1, Acc1, Temp1 137 138 LDR r0x0fe00fe0, =0x0fe00fe0 ;// 0x0fe00fe0 = (16 * Offset) - 16 where Offset is 255 139 UXTAB16 Acc2, ValC, ValH, ROR #8 140 ADD ValI, r0x00ff00ff, ValI ;// [00 i1 00 i0] + [0 255 0 255] 141 UQSUB16 Acc0, Acc0, r0x0fe00fe0 142 UQSUB16 Acc1, Acc1, r0x0fe00fe0 143 USAT16 Acc0, #13, Acc0 144 USAT16 Acc1, #13, Acc1 145 146 ;// Calculate Acc2 147 ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h 148 UXTAB16 Temp1, ValG, ValD, ROR #8 149 UXTAB16 Acc3, ValI, ValD, ROR #8 150 UXTAB16 Temp2, ValE, ValF, ROR #8 151 AND Acc1, r0x00ff00ff, Acc1, LSR #5 152 AND Acc0, r0x00ff00ff, Acc0, LSR #5 153 ORR Acc0, Acc0, Acc1, LSL #8 154 RSB Temp5, Temp1, Temp2, LSL #2 155 UXTAB16 Temp2, ValG, ValF, ROR #8 156 ADD Temp5, Temp5, Temp5, LSL #2 157 ADD Acc2, Acc2, Temp5 158 159 ;// Calculate Acc3 160 ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i 161 UXTAB16 Temp5, ValE, ValH, ROR #8 162 RSB Temp5, Temp5, Temp2, LSL #2 163 LDR r0x0fe00fe0, =0x0fe00fe0 164 ADD Temp5, Temp5, Temp5, LSL #2 165 ADD Acc3, Acc3, Temp5 166 167 UQSUB16 Acc3, Acc3, r0x0fe00fe0 168 UQSUB16 Acc2, Acc2, r0x0fe00fe0 169 USAT16 Acc3, #13, Acc3 170 USAT16 Acc2, #13, Acc2 171 172 M_LDR dstStep, pDstStep 173 AND Acc3, r0x00ff00ff, Acc3, LSR #5 174 AND Acc2, r0x00ff00ff, Acc2, LSR #5 175 ORR Acc2, Acc2, Acc3, LSL #8 176 177 SUBS Counter, Counter, #1 178 M_LDR srcStep, pSrcStep 179 PKHBT Acc1, Acc0, Acc2, LSL #16 180 M_STR Acc1, [pDst], dstStep ;// Store result1 181 PKHTB Acc2, Acc2, Acc0, ASR #16 182 M_STR Acc2, [pDst], dstStep ;// Store result2 183 ADD pSrc, pSrc, srcStep, LSL #1 184 185 BGT NextTwoRowsLoop 186End 187 SUB pDst, pDst, dstStep, LSL #2 188 SUB pSrc, pSrc, srcStep, LSL #2 189 190 M_END 191 192 ENDIF 193 194 END 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254