armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s revision 0c1bc742181ded4930842b46e9507372f0b1b963
1;// 2;// 3;// File Name: armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s 4;// OpenMAX DL: v1.0.2 5;// Revision: 12290 6;// Date: Wednesday, April 9, 2008 7;// 8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9;// 10;// 11;// 12 13 INCLUDE omxtypes_s.h 14 INCLUDE armCOMM_s.h 15 16 EXPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 17 18 M_VARIANTS CortexA8 19 20 IF CortexA8 21 M_START armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe, r11 22 23;// Declare input registers 24pSrc RN 0 25srcStep RN 1 26pDst RN 2 27dstStep RN 3 28 29;// Declare Neon registers 30dTCoeff5 DN 30.U8 31dTCoeff20 DN 31.U8 32dCoeff5 DN 30.S16 33dCoeff20 DN 31.S16 34 35qSrcA01 QN 0.U8 36qSrcB23 QN 1.U8 37qSrcC45 QN 2.U8 38qSrcD67 QN 3.U8 39qSrcE89 QN 4.U8 40qSrcF1011 QN 5.U8 41qSrcG1213 QN 6.U8 42qSrcH1415 QN 7.U8 43qSrcI1617 QN 8.U8 44 45dSrcA0 DN 0.U8 46dSrcB2 DN 2.U8 47dSrcC4 DN 4.U8 48dSrcD6 DN 6.U8 49dSrcE8 DN 8.U8 50dSrcF10 DN 10.U8 51dSrcG12 DN 12.U8 52dSrcH14 DN 14.U8 53dSrcI16 DN 16.U8 54 55dSrcA1 DN 1.U8 56dSrcB3 DN 3.U8 57dSrcC5 DN 5.U8 58dSrcD7 DN 7.U8 59dSrcE9 DN 9.U8 60dSrcF11 DN 11.U8 61dSrcG13 DN 13.U8 62dSrcH15 DN 15.U8 63dSrcI17 DN 17.U8 64 65qTempP01 QN 9.S16 66qTempQ01 QN 10.S16 67qTempR01 QN 11.S16 68qTempS01 QN 12.S16 69 70qTempP23 QN 0.S16 71qTempQ23 QN 1.S16 72qTempR23 QN 2.S16 73qTempS23 QN 3.S16 74 75dTempP0 DN 18.S16 76dTempP1 DN 19.S16 77dTempP2 DN 0.S16 78 79dTempQ0 DN 20.S16 80dTempQ1 DN 21.S16 81dTempQ2 DN 2.S16 82 83dTempR0 DN 22.S16 84dTempR1 DN 23.S16 85dTempR2 DN 4.S16 86 87dTempS0 DN 24.S16 88dTempS1 DN 25.S16 89dTempS2 DN 6.S16 90 91dTempB0 DN 26.S16 92dTempC0 DN 27.S16 93dTempD0 DN 28.S16 94dTempF0 DN 29.S16 95 96dTempAcc0 DN 0.U16 97dTempAcc1 DN 2.U16 98dTempAcc2 DN 4.U16 99dTempAcc3 DN 6.U16 100 101dAcc0 DN 0.U8 102dAcc1 DN 2.U8 103dAcc2 DN 4.U8 104dAcc3 DN 6.U8 105 106qAcc0 QN 0.S32 107qAcc1 QN 1.S32 108qAcc2 QN 2.S32 109qAcc3 QN 3.S32 110 111qTAcc0 QN 0.U16 112qTAcc1 QN 1.U16 113qTAcc2 QN 2.U16 114qTAcc3 QN 3.U16 115 116qTmp QN 4.S16 117dTmp DN 8.S16 118 119 VLD1 qSrcA01, [pSrc], srcStep ;// [a0 a1 a2 a3 .. a15] 120 ADD r12, pSrc, srcStep, LSL #2 121 VMOV dTCoeff5, #5 122 VMOV dTCoeff20, #20 123 VLD1 qSrcF1011, [r12], srcStep 124 VLD1 qSrcB23, [pSrc], srcStep ;// [b0 b1 b2 b3 .. b15] 125 126 VLD1 qSrcG1213, [r12], srcStep 127 VADDL qTempP01, dSrcA0, dSrcF10 128 VLD1 qSrcC45, [pSrc], srcStep ;// [c0 c1 c2 c3 .. c15] 129 VADDL qTempP23, dSrcA1, dSrcF11 130 VLD1 qSrcD67, [pSrc], srcStep 131 VADDL qTempQ01, dSrcB2, dSrcG12 132 VLD1 qSrcE89, [pSrc], srcStep 133 134 ;//t0 135 VMLAL qTempP01, dSrcC4, dTCoeff20 136 137 VLD1 qSrcH1415, [r12], srcStep 138 139 VMLAL qTempP23, dSrcC5, dTCoeff20 140 141 VLD1 qSrcI1617, [r12], srcStep ;// [i0 i1 i2 i3 .. ] 142 143 VMLAL qTempP01, dSrcD6, dTCoeff20 144 VMLAL qTempQ01, dSrcD6, dTCoeff20 145 VMLSL qTempP23, dSrcB3, dTCoeff5 146 147 VADDL qTempR01, dSrcC4, dSrcH14 148 149 VMLSL qTempP01, dSrcB2, dTCoeff5 150 151 VADDL qTempQ23, dSrcB3, dSrcG13 152 153 VMLAL qTempP23, dSrcD7, dTCoeff20 154 VMLAL qTempQ01, dSrcE8, dTCoeff20 155 156 VMLSL qTempP01, dSrcE8, dTCoeff5 157 VMLAL qTempQ23, dSrcD7, dTCoeff20 158 159 VMLSL qTempP23, dSrcE9, dTCoeff5 160 161 ;//t1 162 163 VMLAL qTempR01, dSrcE8, dTCoeff20 164 VMLSL qTempQ01, dSrcC4, dTCoeff5 165 VMLSL qTempQ23, dSrcC5, dTCoeff5 166 VADDL qTempR23, dSrcC5, dSrcH15 167 168 VMLAL qTempR01, dSrcF10, dTCoeff20 169 VMLSL qTempQ01, dSrcF10, dTCoeff5 170 VMLAL qTempQ23, dSrcE9, dTCoeff20 171 VMLAL qTempR23, dSrcE9, dTCoeff20 172 VADDL qTempS01, dSrcD6, dSrcI16 173 174 175 VMLSL qTempR01, dSrcD6, dTCoeff5 176 VMLSL qTempQ23, dSrcF11, dTCoeff5 177 VMLSL qTempR23, dSrcD7, dTCoeff5 178 179 ;//t2 180 VADDL qTempS23, dSrcD7, dSrcI17 181 VMLAL qTempS01, dSrcF10, dTCoeff20 182 VMLSL qTempR01, dSrcG12, dTCoeff5 183 VMLSL qTempR23, dSrcG13, dTCoeff5 184 185 VMLAL qTempS23, dSrcF11, dTCoeff20 186 VMLAL qTempS01, dSrcG12, dTCoeff20 187 VEXT dTempB0, dTempP0, dTempP1, #1 188 VMLAL qTempR23, dSrcF11, dTCoeff20 189 190 191 ;//t3 192 VMLAL qTempS23, dSrcG13, dTCoeff20 193 VMLSL qTempS01, dSrcE8, dTCoeff5 194 VEXT dTempC0, dTempP0, dTempP1, #2 195 VMOV dCoeff20, #20 196 VMLSL qTempS23, dSrcE9, dTCoeff5 197 VMLSL qTempS01, dSrcH14, dTCoeff5 198 VEXT dTempF0, dTempP1, dTempP2, #1 199 VEXT dTempD0, dTempP0, dTempP1, #3 200 VMLSL qTempS23, dSrcH15, dTCoeff5 201 202 VADDL qAcc0, dTempP0, dTempF0 203 VADD dTempC0, dTempC0, dTempD0 204 ;//h 205 VMOV dCoeff5, #5 206 207 ;// res0 208 VADD dTempB0, dTempB0, dTempP1 209 VMLAL qAcc0, dTempC0, dCoeff20 210 VEXT dTempC0, dTempQ0, dTempQ1, #2 211 VEXT dTempD0, dTempQ0, dTempQ1, #3 212 VEXT dTempF0, dTempQ1, dTempQ2, #1 213 VMLSL qAcc0, dTempB0, dCoeff5 214 215 ;// res1 216 VEXT dTempB0, dTempQ0, dTempQ1, #1 217 VADDL qAcc1, dTempQ0, dTempF0 218 VADD dTempC0, dTempC0, dTempD0 219 VADD dTempB0, dTempB0, dTempQ1 220 VEXT dTempD0, dTempR0, dTempR1, #3 221 VMLAL qAcc1, dTempC0, dCoeff20 222 VEXT dTempF0, dTempR1, dTempR2, #1 223 VEXT dTempC0, dTempR0, dTempR1, #2 224 VEXT dTmp, dTempR0, dTempR1, #1 225 VADDL qAcc2, dTempR0, dTempF0 226 VMLSL qAcc1, dTempB0, dCoeff5 227; VEXT dTempB0, dTempR0, dTempR1, #1 228 VADD dTempC0, dTempC0, dTempD0 229 230 ;// res2 231 VADD dTempB0, dTmp, dTempR1 232 VEXT dTempD0, dTempS0, dTempS1, #3 233 VMLAL qAcc2, dTempC0, dCoeff20 234; VADD dTempB0, dTempB0, dTempR1 235 236 ;// res3 237 VEXT dTempC0, dTempS0, dTempS1, #2 238 VEXT dTempF0, dTempS1, dTempS2, #1 239 VADD dTempC0, dTempC0, dTempD0 240 VEXT dTmp, dTempS0, dTempS1, #1 241 VADDL qAcc3, dTempS0, dTempF0 242 VMLSL qAcc2, dTempB0, dCoeff5 243 VMLAL qAcc3, dTempC0, dCoeff20 244 VADD dTmp, dTmp, dTempS1 245 VMLSL qAcc3, dTmp, dCoeff5 246 247 VQRSHRUN dTempAcc0, qAcc0, #10 248 VQRSHRUN dTempAcc1, qAcc1, #10 249 VQRSHRUN dTempAcc2, qAcc2, #10 250 VQRSHRUN dTempAcc3, qAcc3, #10 251 252 VQMOVN dAcc0, qTAcc0 253 VQMOVN dAcc1, qTAcc1 254 VQMOVN dAcc2, qTAcc2 255 VQMOVN dAcc3, qTAcc3 256 257 M_END 258 259 ENDIF 260 261 262 263 264 265 END 266 267