1;// 2;// Copyright (C) 2007-2008 ARM Limited 3;// 4;// Licensed under the Apache License, Version 2.0 (the "License"); 5;// you may not use this file except in compliance with the License. 6;// You may obtain a copy of the License at 7;// 8;// http://www.apache.org/licenses/LICENSE-2.0 9;// 10;// Unless required by applicable law or agreed to in writing, software 11;// distributed under the License is distributed on an "AS IS" BASIS, 12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13;// See the License for the specific language governing permissions and 14;// limitations under the License. 15;// 16;// 17;// 18;// File Name: armVCM4P10_InterpolateLuma_Align_unsafe_s.s 19;// OpenMAX DL: v1.0.2 20;// Revision: 12290 21;// Date: Wednesday, April 9, 2008 22;// 23;// 24;// 25;// 26 27 INCLUDE omxtypes_s.h 28 INCLUDE armCOMM_s.h 29 30 M_VARIANTS ARM1136JS 31 32 EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 33 EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 34 35DEBUG_ON SETL {FALSE} 36 37 IF ARM1136JS 38 39;// Declare input registers 40pSrc RN 0 41srcStep RN 1 42pDst RN 8 43iHeight RN 9 44 45;// Declare inner loop registers 46x RN 7 47x0 RN 7 48x1 RN 10 49x2 RN 11 50Scratch RN 12 51 52;// Function: 53;// armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 54;// 55;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned 56;// destination pointed by (pDst) for horizontal interpolation. 57;// This function needs to copy 9 bytes in horizontal direction. 58;// 59;// Registers used as input for this function 60;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy 61;// 62;// Registers preserved for top level function 63;// r2,r3,r4,r5,r6 64;// 65;// Registers modified by the function 66;// r7,r8,r9,r10,r11,r12 67;// 68;// Output registers 69;// r0 - pointer to the new aligned location which will be used as pSrc 70;// r1 - step size to this aligned location 71 72 ;// Function header 73 M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 74 75 ;// Copy pDst to scratch 76 MOV Scratch, pDst 77 78StartAlignedStackCopy 79 AND x, pSrc, #3 80 BIC pSrc, pSrc, #3 81 82 M_SWITCH x 83 M_CASE Copy0toAligned 84 M_CASE Copy1toAligned 85 M_CASE Copy2toAligned 86 M_CASE Copy3toAligned 87 M_ENDSWITCH 88 89Copy0toAligned 90 LDM pSrc, {x0, x1, x2} 91 SUBS iHeight, iHeight, #1 92 ADD pSrc, pSrc, srcStep 93 94 ;// One cycle stall 95 96 STM pDst!, {x0, x1, x2} ;// Store aligned output row 97 BGT Copy0toAligned 98 B CopyEnd 99 100Copy1toAligned 101 LDM pSrc, {x0, x1, x2} 102 SUBS iHeight, iHeight, #1 103 ADD pSrc, pSrc, srcStep 104 105 ;// One cycle stall 106 107 MOV x0, x0, LSR #8 108 ORR x0, x0, x1, LSL #24 109 MOV x1, x1, LSR #8 110 ORR x1, x1, x2, LSL #24 111 MOV x2, x2, LSR #8 112 STM pDst!, {x0, x1, x2} ;// Store aligned output row 113 BGT Copy1toAligned 114 B CopyEnd 115 116Copy2toAligned 117 LDM pSrc, {x0, x1, x2} 118 SUBS iHeight, iHeight, #1 119 ADD pSrc, pSrc, srcStep 120 121 ;// One cycle stall 122 123 MOV x0, x0, LSR #16 124 ORR x0, x0, x1, LSL #16 125 MOV x1, x1, LSR #16 126 ORR x1, x1, x2, LSL #16 127 MOV x2, x2, LSR #16 128 STM pDst!, {x0, x1, x2} ;// Store aligned output row 129 BGT Copy2toAligned 130 B CopyEnd 131 132Copy3toAligned 133 LDM pSrc, {x0, x1, x2} 134 SUBS iHeight, iHeight, #1 135 ADD pSrc, pSrc, srcStep 136 137 ;// One cycle stall 138 139 MOV x0, x0, LSR #24 140 ORR x0, x0, x1, LSL #8 141 MOV x1, x1, LSR #24 142 ORR x1, x1, x2, LSL #8 143 MOV x2, x2, LSR #24 144 STM pDst!, {x0, x1, x2} ;// Store aligned output row 145 BGT Copy3toAligned 146 147CopyEnd 148 149 MOV pSrc, Scratch 150 MOV srcStep, #12 151 152 M_END 153 154 155;// Function: 156;// armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 157;// 158;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned 159;// destination pointed by (pDst) for vertical interpolation. 160;// This function needs to copy 4 bytes in horizontal direction 161;// 162;// Registers used as input for this function 163;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy 164;// 165;// Registers preserved for top level function 166;// r2,r3,r4,r5,r6 167;// 168;// Registers modified by the function 169;// r7,r8,r9,r10,r11,r12 170;// 171;// Output registers 172;// r0 - pointer to the new aligned location which will be used as pSrc 173;// r1 - step size to this aligned location 174 175 ;// Function header 176 M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 177 178 ;// Copy pSrc to stack 179StartVAlignedStackCopy 180 AND x, pSrc, #3 181 BIC pSrc, pSrc, #3 182 183 184 M_SWITCH x 185 M_CASE Copy0toVAligned 186 M_CASE Copy1toVAligned 187 M_CASE Copy2toVAligned 188 M_CASE Copy3toVAligned 189 M_ENDSWITCH 190 191Copy0toVAligned 192 M_LDR x0, [pSrc], srcStep 193 SUBS iHeight, iHeight, #1 194 195 ;// One cycle stall 196 197 STR x0, [pDst], #4 ;// Store aligned output row 198 BGT Copy0toVAligned 199 B CopyVEnd 200 201Copy1toVAligned 202 LDR x1, [pSrc, #4] 203 M_LDR x0, [pSrc], srcStep 204 SUBS iHeight, iHeight, #1 205 206 ;// One cycle stall 207 208 MOV x1, x1, LSL #24 209 ORR x0, x1, x0, LSR #8 210 STR x0, [pDst], #4 ;// Store aligned output row 211 BGT Copy1toVAligned 212 B CopyVEnd 213 214Copy2toVAligned 215 LDR x1, [pSrc, #4] 216 M_LDR x0, [pSrc], srcStep 217 SUBS iHeight, iHeight, #1 218 219 ;// One cycle stall 220 221 MOV x1, x1, LSL #16 222 ORR x0, x1, x0, LSR #16 223 STR x0, [pDst], #4 ;// Store aligned output row 224 BGT Copy2toVAligned 225 B CopyVEnd 226 227Copy3toVAligned 228 LDR x1, [pSrc, #4] 229 M_LDR x0, [pSrc], srcStep 230 SUBS iHeight, iHeight, #1 231 232 ;// One cycle stall 233 234 MOV x1, x1, LSL #8 235 ORR x0, x1, x0, LSR #24 236 STR x0, [pDst], #4 ;// Store aligned output row 237 BGT Copy3toVAligned 238 239CopyVEnd 240 241 SUB pSrc, pDst, #28 242 MOV srcStep, #4 243 244 M_END 245 246 247 ENDIF 248 249 END 250 251