1;// 2;// 3;// File Name: armVCM4P10_Interpolate_Chroma_s.s 4;// OpenMAX DL: v1.0.2 5;// Revision: 9641 6;// Date: Thursday, February 7, 2008 7;// 8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9;// 10;// 11;// 12 13 14 INCLUDE omxtypes_s.h 15 INCLUDE armCOMM_s.h 16 17 M_VARIANTS CortexA8 18 19 20 IF CortexA8 21 22 M_TABLE armVCM4P10_WidthBranchTableMVIsNotZero 23 24 DCD WidthIs2MVIsNotZero, WidthIs2MVIsNotZero 25 DCD WidthIs4MVIsNotZero, WidthIs4MVIsNotZero 26 DCD WidthIs8MVIsNotZero 27 28 M_TABLE armVCM4P10_WidthBranchTableMVIsZero 29 30 DCD WidthIs2MVIsZero, WidthIs2MVIsZero 31 DCD WidthIs4MVIsZero, WidthIs4MVIsZero 32 DCD WidthIs8MVIsZero 33 34 35;// input registers 36 37pSrc RN 0 38iSrcStep RN 1 39pDst RN 2 40iDstStep RN 3 41iWidth RN 4 42iHeight RN 5 43dx RN 6 44dy RN 7 45 46;// local variable registers 47pc RN 15 48return RN 0 49EightMinusdx RN 8 50EightMinusdy RN 9 51 52ACoeff RN 12 53BCoeff RN 9 54CCoeff RN 8 55DCoeff RN 6 56 57pTable RN 11 58 59Step1 RN 10 60SrcStepMinus1 RN 14 61 62dACoeff DN D12.U8 63dBCoeff DN D13.U8 64dCCoeff DN D14.U8 65dDCoeff DN D15.U8 66 67dRow0a DN D0.U8 68dRow0b DN D1.U8 69dRow1a DN D2.U8 70dRow1b DN D3.U8 71 72qRow0a QN Q2.S16 73qRow0b QN Q3.S16 74 75;//dIndex DN D16.U8 76qRow1a QN Q11.S16 77qRow1b QN Q12.S16 78 79dRow2a DN D16.U8 80dRow2b DN D17.U8 81dRow3a DN D18.U8 82dRow3b DN D19.U8 83 84qOutRow2 QN Q11.U16 85qOutRow3 QN Q12.U16 86dOutRow2 DN D20.U8 87dOutRow3 DN D21.U8 88dOutRow2U64 DN D20.U64 89dOutRow3U64 DN D21.U64 90 91qOutRow0 QN Q2.U16 92qOutRow1 QN Q3.U16 93dOutRow0 DN D8.U8 94dOutRow1 DN D9.U8 95 96dOutRow0U64 DN D8.U64 97dOutRow1U64 DN D9.U64 98 99dOutRow0U32 DN D8.U32 100dOutRow1U32 DN D9.U32 101 102dOutRow0U16 DN D8.U16 103dOutRow1U16 DN D9.U16 104 105 106dOut0U64 DN D0.U64 107dOut1U64 DN D1.U64 108 109dOut00U32 DN D0.U32 110dOut01U32 DN D1.U32 111dOut10U32 DN D2.U32 112dOut11U32 DN D3.U32 113 114dOut0U16 DN D0.U16 115dOut1U16 DN D1.U16 116 117;//----------------------------------------------------------------------------------------------- 118;// armVCM4P10_Interpolate_Chroma_asm starts 119;//----------------------------------------------------------------------------------------------- 120 121 ;// Write function header 122 M_START armVCM4P10_Interpolate_Chroma, r11, d15 123 124 ;// Define stack arguments 125 M_ARG Width, 4 126 M_ARG Height, 4 127 M_ARG Dx, 4 128 M_ARG Dy, 4 129 130 ;// Load argument from the stack 131 ;// M_STALL ARM1136JS=4 132 133 M_LDRD dx, dy, Dx 134 M_LDRD iWidth, iHeight, Width 135 136 ;// EightMinusdx = 8 - dx 137 ;// EightMinusdy = 8 - dy 138 139 ;// ACoeff = EightMinusdx * EightMinusdy 140 ;// BCoeff = dx * EightMinusdy 141 ;// CCoeff = EightMinusdx * dy 142 ;// DCoeff = dx * dy 143 144 RSB EightMinusdx, dx, #8 145 RSB EightMinusdy, dy, #8 146 CMN dx,dy 147 MOV Step1, #1 148 LDREQ pTable, =armVCM4P10_WidthBranchTableMVIsZero 149 SUB SrcStepMinus1, iSrcStep, Step1 150 LDRNE pTable, =armVCM4P10_WidthBranchTableMVIsNotZero 151 152 VLD1 dRow0a, [pSrc], Step1 ;// 0a 153 154 SMULBB ACoeff, EightMinusdx, EightMinusdy 155 SMULBB BCoeff, dx, EightMinusdy 156 VLD1 dRow0b, [pSrc], SrcStepMinus1 ;// 0b 157 SMULBB CCoeff, EightMinusdx, dy 158 SMULBB DCoeff, dx, dy 159 160 VDUP dACoeff, ACoeff 161 VDUP dBCoeff, BCoeff 162 VDUP dCCoeff, CCoeff 163 VDUP dDCoeff, DCoeff 164 165 LDR pc, [pTable, iWidth, LSL #1] ;// Branch to the case based on iWidth 166 167;// Pixel layout: 168;// 169;// x00 x01 x02 170;// x10 x11 x12 171;// x20 x21 x22 172 173;// If fractionl mv is not (0, 0) 174WidthIs8MVIsNotZero 175 176 VLD1 dRow1a, [pSrc], Step1 ;// 1a 177 VMULL qRow0a, dRow0a, dACoeff 178 VLD1 dRow1b, [pSrc], SrcStepMinus1 ;// 1b 179 VMULL qRow0b, dRow1a, dACoeff 180 VLD1 dRow2a, [pSrc], Step1 ;// 2a 181 VMLAL qRow0a, dRow0b, dBCoeff 182 VLD1 dRow2b, [pSrc], SrcStepMinus1 ;// 2b 183 VMULL qRow1a, dRow2a, dACoeff 184 VMLAL qRow0b, dRow1b, dBCoeff 185 VLD1 dRow3a, [pSrc], Step1 ;// 3a 186 VMLAL qRow0a, dRow1a, dCCoeff 187 VMLAL qRow1a, dRow2b, dBCoeff 188 VMULL qRow1b, dRow3a, dACoeff 189 VLD1 dRow3b, [pSrc], SrcStepMinus1 ;// 3b 190 VMLAL qRow0b, dRow2a, dCCoeff 191 VLD1 dRow0a, [pSrc], Step1 ;// 0a 192 VMLAL qRow1b, dRow3b, dBCoeff 193 VMLAL qRow1a, dRow3a, dCCoeff 194 VMLAL qRow0a, dRow1b, dDCoeff 195 VLD1 dRow0b, [pSrc], SrcStepMinus1 ;// 0b 196 VMLAL qRow1b, dRow0a, dCCoeff 197 VMLAL qRow0b, dRow2b, dDCoeff 198 VMLAL qRow1a, dRow3b, dDCoeff 199 200 201 SUBS iHeight, iHeight, #4 202 VMLAL qRow1b, dRow0b, dDCoeff 203 204 VQRSHRN dOutRow0, qOutRow0, #6 205 VQRSHRN dOutRow1, qOutRow1, #6 206 VQRSHRN dOutRow2, qOutRow2, #6 207 VST1 dOutRow0U64, [pDst], iDstStep 208 VQRSHRN dOutRow3, qOutRow3, #6 209 210 VST1 dOutRow1U64, [pDst], iDstStep 211 VST1 dOutRow2U64, [pDst], iDstStep 212 VST1 dOutRow3U64, [pDst], iDstStep 213 214 215 BGT WidthIs8MVIsNotZero 216 MOV return, #OMX_Sts_NoErr 217 M_EXIT 218 219WidthIs4MVIsNotZero 220 221 VLD1 dRow1a, [pSrc], Step1 222 VMULL qRow0a, dRow0a, dACoeff 223 VMULL qRow0b, dRow1a, dACoeff 224 VLD1 dRow1b, [pSrc], SrcStepMinus1 225 VMLAL qRow0a, dRow0b, dBCoeff 226 VMLAL qRow0b, dRow1b, dBCoeff 227 VLD1 dRow0a, [pSrc], Step1 228 VMLAL qRow0a, dRow1a, dCCoeff 229 VMLAL qRow0b, dRow0a, dCCoeff 230 VLD1 dRow0b, [pSrc], SrcStepMinus1 231 SUBS iHeight, iHeight, #2 232 VMLAL qRow0b, dRow0b, dDCoeff 233 VMLAL qRow0a, dRow1b, dDCoeff 234 235 VQRSHRN dOutRow1, qOutRow1, #6 236 VQRSHRN dOutRow0, qOutRow0, #6 237 238 VST1 dOutRow0U32[0], [pDst], iDstStep 239 VST1 dOutRow1U32[0], [pDst], iDstStep 240 241 BGT WidthIs4MVIsNotZero 242 MOV return, #OMX_Sts_NoErr 243 M_EXIT 244 245WidthIs2MVIsNotZero 246 247 VLD1 dRow1a, [pSrc], Step1 248 VMULL qRow0a, dRow0a, dACoeff 249 VMULL qRow0b, dRow1a, dACoeff 250 VLD1 dRow1b, [pSrc], SrcStepMinus1 251 VMLAL qRow0a, dRow0b, dBCoeff 252 VMLAL qRow0b, dRow1b, dBCoeff 253 VLD1 dRow0a, [pSrc], Step1 254 VMLAL qRow0a, dRow1a, dCCoeff 255 VMLAL qRow0b, dRow0a, dCCoeff 256 VLD1 dRow0b, [pSrc], SrcStepMinus1 257 SUBS iHeight, iHeight, #2 258 VMLAL qRow0b, dRow0b, dDCoeff 259 VMLAL qRow0a, dRow1b, dDCoeff 260 261 VQRSHRN dOutRow1, qOutRow1, #6 262 VQRSHRN dOutRow0, qOutRow0, #6 263 264 VST1 dOutRow0U16[0], [pDst], iDstStep 265 VST1 dOutRow1U16[0], [pDst], iDstStep 266 267 BGT WidthIs2MVIsNotZero 268 MOV return, #OMX_Sts_NoErr 269 M_EXIT 270 271;// If fractionl mv is (0, 0) 272WidthIs8MVIsZero 273 SUB pSrc, pSrc, iSrcStep 274 275WidthIs8LoopMVIsZero 276 VLD1 dRow0a, [pSrc], iSrcStep 277 SUBS iHeight, iHeight, #2 278 VLD1 dRow0b, [pSrc], iSrcStep 279 VST1 dOut0U64, [pDst], iDstStep 280 VST1 dOut1U64, [pDst], iDstStep 281 BGT WidthIs8LoopMVIsZero 282 283 MOV return, #OMX_Sts_NoErr 284 M_EXIT 285 286WidthIs4MVIsZero 287 VLD1 dRow0b, [pSrc], iSrcStep 288 289 SUBS iHeight, iHeight, #2 290 291 VST1 dOut00U32[0], [pDst], iDstStep 292 VLD1 dRow0a, [pSrc], iSrcStep 293 VST1 dOut01U32[0], [pDst], iDstStep 294 295 BGT WidthIs4MVIsZero 296 MOV return, #OMX_Sts_NoErr 297 M_EXIT 298 299WidthIs2MVIsZero 300 VLD1 dRow0b, [pSrc], iSrcStep 301 SUBS iHeight, iHeight, #2 302 303 VST1 dOut0U16[0], [pDst], iDstStep 304 VLD1 dRow0a, [pSrc], iSrcStep 305 VST1 dOut1U16[0], [pDst], iDstStep 306 307 BGT WidthIs2MVIsZero 308 MOV return, #OMX_Sts_NoErr 309 M_END 310 311 ENDIF ;// CortexA8 312 313 END 314 315;//----------------------------------------------------------------------------------------------- 316;// armVCM4P10_Interpolate_Chroma_asm ends 317;//----------------------------------------------------------------------------------------------- 318 319