1;// 2;// Copyright (C) 2007-2008 ARM Limited 3;// 4;// Licensed under the Apache License, Version 2.0 (the "License"); 5;// you may not use this file except in compliance with the License. 6;// You may obtain a copy of the License at 7;// 8;// http://www.apache.org/licenses/LICENSE-2.0 9;// 10;// Unless required by applicable law or agreed to in writing, software 11;// distributed under the License is distributed on an "AS IS" BASIS, 12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13;// See the License for the specific language governing permissions and 14;// limitations under the License. 15;// 16;// 17;// 18;// File Name: armVCM4P10_Interpolate_Chroma_s.s 19;// OpenMAX DL: v1.0.2 20;// Revision: 9641 21;// Date: Thursday, February 7, 2008 22;// 23;// 24;// 25;// 26 27 28 INCLUDE omxtypes_s.h 29 INCLUDE armCOMM_s.h 30 31 M_VARIANTS ARM1136JS 32 33 IF ARM1136JS 34 35;// input registers 36 37pSrc RN 0 38iSrcStep RN 1 39pDst RN 2 40iDstStep RN 3 41iWidth RN 4 42iHeight RN 5 43dx RN 6 44dy RN 7 45 46 47;// local variable registers 48temp RN 11 49r0x20 RN 12 50tmp0x20 RN 14 51return RN 0 52dxPlusdy RN 10 53EightMinusdx RN 8 54EightMinusdy RN 9 55dxEightMinusdx RN 8 56BACoeff RN 6 57DCCoeff RN 7 58 59iDstStepx2MinusWidth RN 8 60iSrcStepx2MinusWidth RN 9 61iSrcStep1 RN 10 62 63pSrc1 RN 1 64pSrc2 RN 8 65pDst1 RN 8 66pDst2 RN 12 67 68pix00 RN 8 69pix01 RN 9 70pix10 RN 10 71pix11 RN 11 72 73Out0100 RN 8 74Out1110 RN 10 75 76x00 RN 8 77x01 RN 10 78x02 RN 12 79x10 RN 9 80x11 RN 11 81x12 RN 14 82x20 RN 10 83x21 RN 12 84x22 RN 14 85 86x01x00 RN 8 87x02x01 RN 10 88x11x10 RN 9 89x12x11 RN 11 90x21x20 RN 10 91x22x21 RN 12 92 93OutRow00 RN 12 94OutRow01 RN 14 95OutRow10 RN 10 96OutRow11 RN 12 97 98OutRow0100 RN 12 99OutRow1110 RN 12 100 101;//----------------------------------------------------------------------------------------------- 102;// armVCM4P10_Interpolate_Chroma_asm starts 103;//----------------------------------------------------------------------------------------------- 104 105 ;// Write function header 106 M_START armVCM4P10_Interpolate_Chroma, r11 107 108 ;// Define stack arguments 109 M_ARG Width, 4 110 M_ARG Height, 4 111 M_ARG Dx, 4 112 M_ARG Dy, 4 113 114 ;// Load argument from the stack 115 ;// M_STALL ARM1136JS=4 116 117 M_LDR iWidth, Width 118 M_LDR iHeight, Height 119 M_LDR dx, Dx 120 M_LDR dy, Dy 121 122 ;// EightMinusdx = 8 - dx 123 ;// EightMinusdy = 8 - dy 124 125 ;// ACoeff = EightMinusdx * EightMinusdy 126 ;// BCoeff = dx * EightMinusdy 127 ;// CCoeff = EightMinusdx * dy 128 ;// DCoeff = dx * dy 129 130 ADD pSrc1, pSrc, iSrcStep 131 SUB temp, iWidth, #1 132 RSB EightMinusdx, dx, #8 133 RSB EightMinusdy, dy, #8 134 CMN dx,dy 135 ADD dxEightMinusdx, EightMinusdx, dx, LSL #16 136 ORR iWidth, iWidth, temp, LSL #16 137 138 ;// Packed Coeffs. 139 140 MUL BACoeff, dxEightMinusdx, EightMinusdy 141 MUL DCCoeff, dxEightMinusdx, dy 142 143 144 ;// Checking either of dx and dy being non-zero 145 146 BEQ MVIsZero 147 148;// Pixel layout: 149;// 150;// x00 x01 x02 151;// x10 x11 x12 152;// x20 x21 x22 153 154;// If fractionl mv is not (0, 0) 155 156OuterLoopMVIsNotZero 157 158InnerLoopMVIsNotZero 159 160 LDRB x00, [pSrc, #+0] 161 LDRB x10, [pSrc1, #+0] 162 LDRB x01, [pSrc, #+1] 163 LDRB x11, [pSrc1, #+1] 164 LDRB x02, [pSrc, #+2]! 165 LDRB x12, [pSrc1, #+2]! 166 167 ORR x01x00, x00, x01, LSL #16 168 ;// M_STALL ARM1136JS=1 169 ORR x02x01, x01, x02, LSL #16 170 MOV r0x20, #32 171 ORR x11x10, x10, x11, LSL #16 172 ORR x12x11, x11, x12, LSL #16 173 174 SMLAD x01x00, x01x00, BACoeff, r0x20 175 SMLAD x02x01, x02x01, BACoeff, r0x20 176 177 ;// iWidth packed with MSB (top 16 bits) 178 ;// as inner loop counter value i.e 179 ;// (iWidth -1) and LSB (lower 16 bits) 180 ;// as original width 181 182 SUBS iWidth, iWidth, #1<<17 183 184 SMLAD OutRow00, x11x10, DCCoeff, x01x00 185 SMLAD OutRow01, x12x11, DCCoeff, x02x01 186 187 RSB pSrc2, pSrc, pSrc1, LSL #1 188 189 MOV OutRow00, OutRow00, LSR #6 190 MOV OutRow01, OutRow01, LSR #6 191 192 LDRB x20,[pSrc2, #-2] 193 194 ORR OutRow0100, OutRow00, OutRow01, LSL #8 195 STRH OutRow0100, [pDst], #2 196 197 LDRB x21,[pSrc2, #-1] 198 LDRB x22,[pSrc2, #+0] 199 200 ADD pDst1, pDst, iDstStep 201 202 ;// M_STALL ARM1136JS=1 203 204 ORR x21x20, x20, x21, LSL #16 205 ORR x22x21, x21, x22, LSL #16 206 207 MOV tmp0x20, #32 208 209 ;// Reusing the packed data x11x10 and x12x11 210 211 SMLAD x11x10, x11x10, BACoeff, tmp0x20 212 SMLAD x12x11, x12x11, BACoeff, tmp0x20 213 SMLAD OutRow10, x21x20, DCCoeff, x11x10 214 SMLAD OutRow11, x22x21, DCCoeff, x12x11 215 216 MOV OutRow10, OutRow10, LSR #6 217 MOV OutRow11, OutRow11, LSR #6 218 219 ;// M_STALL ARM1136JS=1 220 221 ORR OutRow1110, OutRow10, OutRow11, LSL #8 222 223 STRH OutRow1110, [pDst1, #-2] 224 225 BGT InnerLoopMVIsNotZero 226 227 SUBS iHeight, iHeight, #2 228 ADD iWidth, iWidth, #1<<16 229 RSB iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1 230 SUB iSrcStep1, pSrc1, pSrc 231 SUB temp, iWidth, #1 232 RSB iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1 233 ADD pDst, pDst, iDstStepx2MinusWidth 234 ADD pSrc1, pSrc1, iSrcStepx2MinusWidth 235 ADD pSrc, pSrc, iSrcStepx2MinusWidth 236 ORR iWidth, iWidth, temp, LSL #16 237 BGT OuterLoopMVIsNotZero 238 MOV return, #OMX_Sts_NoErr 239 M_EXIT 240 241;// If fractionl mv is (0, 0) 242 243MVIsZero 244 ;// M_STALL ARM1136JS=4 245OuterLoopMVIsZero 246 247InnerLoopMVIsZero 248 249 LDRB pix00, [pSrc], #+1 250 LDRB pix01, [pSrc], #+1 251 LDRB pix10, [pSrc1], #+1 252 LDRB pix11, [pSrc1], #+1 253 254 ADD pDst2, pDst, iDstStep 255 SUBS iWidth, iWidth, #1<<17 256 257 ORR Out0100, pix00, pix01, LSL #8 258 ORR Out1110, pix10, pix11, LSL #8 259 260 STRH Out0100, [pDst], #2 261 STRH Out1110, [pDst2], #2 262 263 BGT InnerLoopMVIsZero 264 265 SUBS iHeight, iHeight, #2 266 ADD iWidth, iWidth, #1<<16 267 RSB iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1 268 SUB iSrcStep1, pSrc1, pSrc 269 SUB temp, iWidth, #1 270 RSB iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1 271 ADD pDst, pDst, iDstStepx2MinusWidth 272 ADD pSrc1, pSrc1, iSrcStepx2MinusWidth 273 ADD pSrc, pSrc, iSrcStepx2MinusWidth 274 ORR iWidth, iWidth, temp, LSL #16 275 BGT OuterLoopMVIsZero 276 MOV return, #OMX_Sts_NoErr 277 M_END 278 279 ENDIF ;// ARM1136JS 280 281 282 END 283 284;//----------------------------------------------------------------------------------------------- 285;// armVCM4P10_Interpolate_Chroma_asm ends 286;//----------------------------------------------------------------------------------------------- 287 288