1;// 2;// 3;// File Name: armVCM4P10_InterpolateLuma_Align_unsafe_s.s 4;// OpenMAX DL: v1.0.2 5;// Revision: 9641 6;// Date: Thursday, February 7, 2008 7;// 8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9;// 10;// 11;// 12 13 INCLUDE omxtypes_s.h 14 INCLUDE armCOMM_s.h 15 16 M_VARIANTS ARM1136JS 17 18 EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 19 EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 20 21DEBUG_ON SETL {FALSE} 22 23 IF ARM1136JS 24 25;// Declare input registers 26pSrc RN 0 27srcStep RN 1 28pDst RN 8 29iHeight RN 9 30 31;// Declare inner loop registers 32x RN 7 33x0 RN 7 34x1 RN 10 35x2 RN 11 36Scratch RN 12 37 38;// Function: 39;// armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 40;// 41;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned 42;// destination pointed by (pDst) for horizontal interpolation. 43;// This function needs to copy 9 bytes in horizontal direction. 44;// 45;// Registers used as input for this function 46;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy 47;// 48;// Registers preserved for top level function 49;// r2,r3,r4,r5,r6 50;// 51;// Registers modified by the function 52;// r7,r8,r9,r10,r11,r12 53;// 54;// Output registers 55;// r0 - pointer to the new aligned location which will be used as pSrc 56;// r1 - step size to this aligned location 57 58 ;// Function header 59 M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 60 61 ;// Copy pDst to scratch 62 MOV Scratch, pDst 63 64StartAlignedStackCopy 65 AND x, pSrc, #3 66 BIC pSrc, pSrc, #3 67 68 M_SWITCH x 69 M_CASE Copy0toAligned 70 M_CASE Copy1toAligned 71 M_CASE Copy2toAligned 72 M_CASE Copy3toAligned 73 M_ENDSWITCH 74 75Copy0toAligned 76 LDM pSrc, {x0, x1, x2} 77 SUBS iHeight, iHeight, #1 78 ADD pSrc, pSrc, srcStep 79 80 ;// One cycle stall 81 82 STM pDst!, {x0, x1, x2} ;// Store aligned output row 83 BGT Copy0toAligned 84 B CopyEnd 85 86Copy1toAligned 87 LDM pSrc, {x0, x1, x2} 88 SUBS iHeight, iHeight, #1 89 ADD pSrc, pSrc, srcStep 90 91 ;// One cycle stall 92 93 MOV x0, x0, LSR #8 94 ORR x0, x0, x1, LSL #24 95 MOV x1, x1, LSR #8 96 ORR x1, x1, x2, LSL #24 97 MOV x2, x2, LSR #8 98 STM pDst!, {x0, x1, x2} ;// Store aligned output row 99 BGT Copy1toAligned 100 B CopyEnd 101 102Copy2toAligned 103 LDM pSrc, {x0, x1, x2} 104 SUBS iHeight, iHeight, #1 105 ADD pSrc, pSrc, srcStep 106 107 ;// One cycle stall 108 109 MOV x0, x0, LSR #16 110 ORR x0, x0, x1, LSL #16 111 MOV x1, x1, LSR #16 112 ORR x1, x1, x2, LSL #16 113 MOV x2, x2, LSR #16 114 STM pDst!, {x0, x1, x2} ;// Store aligned output row 115 BGT Copy2toAligned 116 B CopyEnd 117 118Copy3toAligned 119 LDM pSrc, {x0, x1, x2} 120 SUBS iHeight, iHeight, #1 121 ADD pSrc, pSrc, srcStep 122 123 ;// One cycle stall 124 125 MOV x0, x0, LSR #24 126 ORR x0, x0, x1, LSL #8 127 MOV x1, x1, LSR #24 128 ORR x1, x1, x2, LSL #8 129 MOV x2, x2, LSR #24 130 STM pDst!, {x0, x1, x2} ;// Store aligned output row 131 BGT Copy3toAligned 132 133CopyEnd 134 135 MOV pSrc, Scratch 136 MOV srcStep, #12 137 138 M_END 139 140 141;// Function: 142;// armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 143;// 144;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned 145;// destination pointed by (pDst) for vertical interpolation. 146;// This function needs to copy 4 bytes in horizontal direction 147;// 148;// Registers used as input for this function 149;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy 150;// 151;// Registers preserved for top level function 152;// r2,r3,r4,r5,r6 153;// 154;// Registers modified by the function 155;// r7,r8,r9,r10,r11,r12 156;// 157;// Output registers 158;// r0 - pointer to the new aligned location which will be used as pSrc 159;// r1 - step size to this aligned location 160 161 ;// Function header 162 M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 163 164 ;// Copy pSrc to stack 165StartVAlignedStackCopy 166 AND x, pSrc, #3 167 BIC pSrc, pSrc, #3 168 169 170 M_SWITCH x 171 M_CASE Copy0toVAligned 172 M_CASE Copy1toVAligned 173 M_CASE Copy2toVAligned 174 M_CASE Copy3toVAligned 175 M_ENDSWITCH 176 177Copy0toVAligned 178 M_LDR x0, [pSrc], srcStep 179 SUBS iHeight, iHeight, #1 180 181 ;// One cycle stall 182 183 STR x0, [pDst], #4 ;// Store aligned output row 184 BGT Copy0toVAligned 185 B CopyVEnd 186 187Copy1toVAligned 188 LDR x1, [pSrc, #4] 189 M_LDR x0, [pSrc], srcStep 190 SUBS iHeight, iHeight, #1 191 192 ;// One cycle stall 193 194 MOV x1, x1, LSL #24 195 ORR x0, x1, x0, LSR #8 196 STR x0, [pDst], #4 ;// Store aligned output row 197 BGT Copy1toVAligned 198 B CopyVEnd 199 200Copy2toVAligned 201 LDR x1, [pSrc, #4] 202 M_LDR x0, [pSrc], srcStep 203 SUBS iHeight, iHeight, #1 204 205 ;// One cycle stall 206 207 MOV x1, x1, LSL #16 208 ORR x0, x1, x0, LSR #16 209 STR x0, [pDst], #4 ;// Store aligned output row 210 BGT Copy2toVAligned 211 B CopyVEnd 212 213Copy3toVAligned 214 LDR x1, [pSrc, #4] 215 M_LDR x0, [pSrc], srcStep 216 SUBS iHeight, iHeight, #1 217 218 ;// One cycle stall 219 220 MOV x1, x1, LSL #8 221 ORR x0, x1, x0, LSR #24 222 STR x0, [pDst], #4 ;// Store aligned output row 223 BGT Copy3toVAligned 224 225CopyVEnd 226 227 SUB pSrc, pDst, #28 228 MOV srcStep, #4 229 230 M_END 231 232 233 ENDIF 234 235 END 236 237