armVCM4P10_Average_4x_Align_unsafe_s.s revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1;// 2;// Copyright (C) 2007-2008 ARM Limited 3;// 4;// Licensed under the Apache License, Version 2.0 (the "License"); 5;// you may not use this file except in compliance with the License. 6;// You may obtain a copy of the License at 7;// 8;// http://www.apache.org/licenses/LICENSE-2.0 9;// 10;// Unless required by applicable law or agreed to in writing, software 11;// distributed under the License is distributed on an "AS IS" BASIS, 12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13;// See the License for the specific language governing permissions and 14;// limitations under the License. 15;// 16;// 17;// 18;// File Name: armVCM4P10_Average_4x_Align_unsafe_s.s 19;// OpenMAX DL: v1.0.2 20;// Revision: 12290 21;// Date: Wednesday, April 9, 2008 22;// 23;// 24;// 25;// 26 27 28;// Functions: 29;// armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe 30;// 31;// Implements Average of 4x4 with equation c = (a+b+1)>>1. 32;// First operand will be at offset ALIGNMENT from aligned address 33;// Second operand will be at aligned location and will be used as output. 34;// destination pointed by (pDst) for vertical interpolation. 35;// This function needs to copy 4 bytes in horizontal direction 36;// 37;// Registers used as input for this function 38;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size 39;// 40;// Registers preserved for top level function 41;// r4,r5,r6,r8,r9,r14 42;// 43;// Registers modified by the function 44;// r7,r10,r11,r12 45;// 46;// Output registers 47;// r2 - pointer to the aligned location 48;// r3 - step size to this aligned location 49 50 INCLUDE omxtypes_s.h 51 INCLUDE armCOMM_s.h 52 53 M_VARIANTS ARM1136JS 54 55 EXPORT armVCM4P10_Average_4x4_Align0_unsafe 56 EXPORT armVCM4P10_Average_4x4_Align2_unsafe 57 EXPORT armVCM4P10_Average_4x4_Align3_unsafe 58 59DEBUG_ON SETL {FALSE} 60 61;// Declare input registers 62pPred0 RN 0 63iPredStep0 RN 1 64pPred1 RN 2 65iPredStep1 RN 3 66pDstPred RN 2 67iDstStep RN 3 68 69;// Declare other intermediate registers 70iPredA0 RN 10 71iPredA1 RN 11 72iPredB0 RN 12 73iPredB1 RN 14 74Temp1 RN 4 75Temp2 RN 5 76ResultA RN 5 77ResultB RN 4 78r0x80808080 RN 7 79 80 IF ARM1136JS 81 82 ;// This function calculates average of 4x4 block 83 ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4 84 85 ;// Function header 86 M_START armVCM4P10_Average_4x4_Align0_unsafe, r6 87 88 ;// Code start 89 LDR r0x80808080, =0x80808080 90 91 ;// 1st load 92 M_LDR iPredB0, [pPred1] 93 M_LDR iPredA0, [pPred0], iPredStep0 94 M_LDR iPredB1, [pPred1, iPredStep1] 95 M_LDR iPredA1, [pPred0], iPredStep0 96 97 ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128 98 MVN iPredB0, iPredB0 99 MVN iPredB1, iPredB1 100 UHSUB8 ResultA, iPredA0, iPredB0 101 UHSUB8 ResultB, iPredA1, iPredB1 102 EOR ResultA, ResultA, r0x80808080 103 M_STR ResultA, [pDstPred], iDstStep 104 EOR ResultB, ResultB, r0x80808080 105 M_STR ResultB, [pDstPred], iDstStep 106 107 ;// 2nd load 108 M_LDR iPredA0, [pPred0], iPredStep0 109 M_LDR iPredB0, [pPred1] 110 M_LDR iPredA1, [pPred0], iPredStep0 111 M_LDR iPredB1, [pPred1, iPredStep1] 112 113 MVN iPredB0, iPredB0 114 UHSUB8 ResultA, iPredA0, iPredB0 115 MVN iPredB1, iPredB1 116 UHSUB8 ResultB, iPredA1, iPredB1 117 EOR ResultA, ResultA, r0x80808080 118 M_STR ResultA, [pDstPred], iDstStep 119 EOR ResultB, ResultB, r0x80808080 120 M_STR ResultB, [pDstPred], iDstStep 121End0 122 M_END 123 124 ;// This function calculates average of 4x4 block 125 ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4 126 127 ;// Function header 128 M_START armVCM4P10_Average_4x4_Align2_unsafe, r6 129 130 ;// Code start 131 LDR r0x80808080, =0x80808080 132 133 ;// 1st load 134 LDR Temp1, [pPred0, #4] 135 M_LDR iPredA0, [pPred0], iPredStep0 136 M_LDR iPredB0, [pPred1] 137 M_LDR iPredB1, [pPred1, iPredStep1] 138 M_LDR Temp2, [pPred0, #4] 139 M_LDR iPredA1, [pPred0], iPredStep0 140 MVN iPredB0, iPredB0 141 MVN iPredB1, iPredB1 142 MOV iPredA0, iPredA0, LSR #16 143 ORR iPredA0, iPredA0, Temp1, LSL #16 144 MOV iPredA1, iPredA1, LSR #16 145 ORR iPredA1, iPredA1, Temp2, LSL #16 146 147 ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128 148 UHSUB8 ResultA, iPredA0, iPredB0 149 UHSUB8 ResultB, iPredA1, iPredB1 150 EOR ResultA, ResultA, r0x80808080 151 M_STR ResultA, [pDstPred], iDstStep 152 EOR ResultB, ResultB, r0x80808080 153 M_STR ResultB, [pDstPred], iDstStep 154 155 ;// 2nd load 156 LDR Temp1, [pPred0, #4] 157 M_LDR iPredA0, [pPred0], iPredStep0 158 LDR iPredB0, [pPred1] 159 LDR iPredB1, [pPred1, iPredStep1] 160 LDR Temp2, [pPred0, #4] 161 M_LDR iPredA1, [pPred0], iPredStep0 162 MVN iPredB0, iPredB0 163 MVN iPredB1, iPredB1 164 MOV iPredA0, iPredA0, LSR #16 165 ORR iPredA0, iPredA0, Temp1, LSL #16 166 MOV iPredA1, iPredA1, LSR #16 167 ORR iPredA1, iPredA1, Temp2, LSL #16 168 169 UHSUB8 ResultA, iPredA0, iPredB0 170 UHSUB8 ResultB, iPredA1, iPredB1 171 EOR ResultA, ResultA, r0x80808080 172 M_STR ResultA, [pDstPred], iDstStep 173 EOR ResultB, ResultB, r0x80808080 174 M_STR ResultB, [pDstPred], iDstStep 175End2 176 M_END 177 178 179 ;// This function calculates average of 4x4 block 180 ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4 181 182 ;// Function header 183 M_START armVCM4P10_Average_4x4_Align3_unsafe, r6 184 185 ;// Code start 186 LDR r0x80808080, =0x80808080 187 188 ;// 1st load 189 LDR Temp1, [pPred0, #4] 190 M_LDR iPredA0, [pPred0], iPredStep0 191 LDR iPredB0, [pPred1] 192 LDR iPredB1, [pPred1, iPredStep1] 193 LDR Temp2, [pPred0, #4] 194 M_LDR iPredA1, [pPred0], iPredStep0 195 196 MVN iPredB0, iPredB0 197 MVN iPredB1, iPredB1 198 MOV iPredA0, iPredA0, LSR #24 199 ORR iPredA0, iPredA0, Temp1, LSL #8 200 MOV iPredA1, iPredA1, LSR #24 201 ORR iPredA1, iPredA1, Temp2, LSL #8 202 UHSUB8 ResultA, iPredA0, iPredB0 203 UHSUB8 ResultB, iPredA1, iPredB1 204 EOR ResultA, ResultA, r0x80808080 205 M_STR ResultA, [pDstPred], iDstStep 206 EOR ResultB, ResultB, r0x80808080 207 M_STR ResultB, [pDstPred], iDstStep 208 209 ;// 2nd load 210 LDR Temp1, [pPred0, #4] 211 M_LDR iPredA0, [pPred0], iPredStep0 212 LDR iPredB0, [pPred1] 213 LDR iPredB1, [pPred1, iPredStep1] 214 LDR Temp2, [pPred0, #4] 215 M_LDR iPredA1, [pPred0], iPredStep0 216 217 MVN iPredB0, iPredB0 218 MVN iPredB1, iPredB1 219 MOV iPredA0, iPredA0, LSR #24 220 ORR iPredA0, iPredA0, Temp1, LSL #8 221 MOV iPredA1, iPredA1, LSR #24 222 ORR iPredA1, iPredA1, Temp2, LSL #8 223 224 UHSUB8 ResultA, iPredA0, iPredB0 225 UHSUB8 ResultB, iPredA1, iPredB1 226 EOR ResultA, ResultA, r0x80808080 227 M_STR ResultA, [pDstPred], iDstStep 228 EOR ResultB, ResultB, r0x80808080 229 M_STR ResultB, [pDstPred], iDstStep 230End3 231 M_END 232 233 ENDIF 234 235 END 236 237