armVCM4P10_Average_4x_Align_unsafe_s.s revision 0c1bc742181ded4930842b46e9507372f0b1b963
1;// 2;// 3;// File Name: armVCM4P10_Average_4x_Align_unsafe_s.s 4;// OpenMAX DL: v1.0.2 5;// Revision: 12290 6;// Date: Wednesday, April 9, 2008 7;// 8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9;// 10;// 11;// 12 13 14;// Functions: 15;// armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe 16;// 17;// Implements Average of 4x4 with equation c = (a+b+1)>>1. 18;// First operand will be at offset ALIGNMENT from aligned address 19;// Second operand will be at aligned location and will be used as output. 20;// destination pointed by (pDst) for vertical interpolation. 21;// This function needs to copy 4 bytes in horizontal direction 22;// 23;// Registers used as input for this function 24;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size 25;// 26;// Registers preserved for top level function 27;// r4,r5,r6,r8,r9,r14 28;// 29;// Registers modified by the function 30;// r7,r10,r11,r12 31;// 32;// Output registers 33;// r2 - pointer to the aligned location 34;// r3 - step size to this aligned location 35 36 INCLUDE omxtypes_s.h 37 INCLUDE armCOMM_s.h 38 39 M_VARIANTS ARM1136JS 40 41 EXPORT armVCM4P10_Average_4x4_Align0_unsafe 42 EXPORT armVCM4P10_Average_4x4_Align2_unsafe 43 EXPORT armVCM4P10_Average_4x4_Align3_unsafe 44 45DEBUG_ON SETL {FALSE} 46 47;// Declare input registers 48pPred0 RN 0 49iPredStep0 RN 1 50pPred1 RN 2 51iPredStep1 RN 3 52pDstPred RN 2 53iDstStep RN 3 54 55;// Declare other intermediate registers 56iPredA0 RN 10 57iPredA1 RN 11 58iPredB0 RN 12 59iPredB1 RN 14 60Temp1 RN 4 61Temp2 RN 5 62ResultA RN 5 63ResultB RN 4 64r0x80808080 RN 7 65 66 IF ARM1136JS 67 68 ;// This function calculates average of 4x4 block 69 ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4 70 71 ;// Function header 72 M_START armVCM4P10_Average_4x4_Align0_unsafe, r6 73 74 ;// Code start 75 LDR r0x80808080, =0x80808080 76 77 ;// 1st load 78 M_LDR iPredB0, [pPred1] 79 M_LDR iPredA0, [pPred0], iPredStep0 80 M_LDR iPredB1, [pPred1, iPredStep1] 81 M_LDR iPredA1, [pPred0], iPredStep0 82 83 ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128 84 MVN iPredB0, iPredB0 85 MVN iPredB1, iPredB1 86 UHSUB8 ResultA, iPredA0, iPredB0 87 UHSUB8 ResultB, iPredA1, iPredB1 88 EOR ResultA, ResultA, r0x80808080 89 M_STR ResultA, [pDstPred], iDstStep 90 EOR ResultB, ResultB, r0x80808080 91 M_STR ResultB, [pDstPred], iDstStep 92 93 ;// 2nd load 94 M_LDR iPredA0, [pPred0], iPredStep0 95 M_LDR iPredB0, [pPred1] 96 M_LDR iPredA1, [pPred0], iPredStep0 97 M_LDR iPredB1, [pPred1, iPredStep1] 98 99 MVN iPredB0, iPredB0 100 UHSUB8 ResultA, iPredA0, iPredB0 101 MVN iPredB1, iPredB1 102 UHSUB8 ResultB, iPredA1, iPredB1 103 EOR ResultA, ResultA, r0x80808080 104 M_STR ResultA, [pDstPred], iDstStep 105 EOR ResultB, ResultB, r0x80808080 106 M_STR ResultB, [pDstPred], iDstStep 107End0 108 M_END 109 110 ;// This function calculates average of 4x4 block 111 ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4 112 113 ;// Function header 114 M_START armVCM4P10_Average_4x4_Align2_unsafe, r6 115 116 ;// Code start 117 LDR r0x80808080, =0x80808080 118 119 ;// 1st load 120 LDR Temp1, [pPred0, #4] 121 M_LDR iPredA0, [pPred0], iPredStep0 122 M_LDR iPredB0, [pPred1] 123 M_LDR iPredB1, [pPred1, iPredStep1] 124 M_LDR Temp2, [pPred0, #4] 125 M_LDR iPredA1, [pPred0], iPredStep0 126 MVN iPredB0, iPredB0 127 MVN iPredB1, iPredB1 128 MOV iPredA0, iPredA0, LSR #16 129 ORR iPredA0, iPredA0, Temp1, LSL #16 130 MOV iPredA1, iPredA1, LSR #16 131 ORR iPredA1, iPredA1, Temp2, LSL #16 132 133 ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128 134 UHSUB8 ResultA, iPredA0, iPredB0 135 UHSUB8 ResultB, iPredA1, iPredB1 136 EOR ResultA, ResultA, r0x80808080 137 M_STR ResultA, [pDstPred], iDstStep 138 EOR ResultB, ResultB, r0x80808080 139 M_STR ResultB, [pDstPred], iDstStep 140 141 ;// 2nd load 142 LDR Temp1, [pPred0, #4] 143 M_LDR iPredA0, [pPred0], iPredStep0 144 LDR iPredB0, [pPred1] 145 LDR iPredB1, [pPred1, iPredStep1] 146 LDR Temp2, [pPred0, #4] 147 M_LDR iPredA1, [pPred0], iPredStep0 148 MVN iPredB0, iPredB0 149 MVN iPredB1, iPredB1 150 MOV iPredA0, iPredA0, LSR #16 151 ORR iPredA0, iPredA0, Temp1, LSL #16 152 MOV iPredA1, iPredA1, LSR #16 153 ORR iPredA1, iPredA1, Temp2, LSL #16 154 155 UHSUB8 ResultA, iPredA0, iPredB0 156 UHSUB8 ResultB, iPredA1, iPredB1 157 EOR ResultA, ResultA, r0x80808080 158 M_STR ResultA, [pDstPred], iDstStep 159 EOR ResultB, ResultB, r0x80808080 160 M_STR ResultB, [pDstPred], iDstStep 161End2 162 M_END 163 164 165 ;// This function calculates average of 4x4 block 166 ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4 167 168 ;// Function header 169 M_START armVCM4P10_Average_4x4_Align3_unsafe, r6 170 171 ;// Code start 172 LDR r0x80808080, =0x80808080 173 174 ;// 1st load 175 LDR Temp1, [pPred0, #4] 176 M_LDR iPredA0, [pPred0], iPredStep0 177 LDR iPredB0, [pPred1] 178 LDR iPredB1, [pPred1, iPredStep1] 179 LDR Temp2, [pPred0, #4] 180 M_LDR iPredA1, [pPred0], iPredStep0 181 182 MVN iPredB0, iPredB0 183 MVN iPredB1, iPredB1 184 MOV iPredA0, iPredA0, LSR #24 185 ORR iPredA0, iPredA0, Temp1, LSL #8 186 MOV iPredA1, iPredA1, LSR #24 187 ORR iPredA1, iPredA1, Temp2, LSL #8 188 UHSUB8 ResultA, iPredA0, iPredB0 189 UHSUB8 ResultB, iPredA1, iPredB1 190 EOR ResultA, ResultA, r0x80808080 191 M_STR ResultA, [pDstPred], iDstStep 192 EOR ResultB, ResultB, r0x80808080 193 M_STR ResultB, [pDstPred], iDstStep 194 195 ;// 2nd load 196 LDR Temp1, [pPred0, #4] 197 M_LDR iPredA0, [pPred0], iPredStep0 198 LDR iPredB0, [pPred1] 199 LDR iPredB1, [pPred1, iPredStep1] 200 LDR Temp2, [pPred0, #4] 201 M_LDR iPredA1, [pPred0], iPredStep0 202 203 MVN iPredB0, iPredB0 204 MVN iPredB1, iPredB1 205 MOV iPredA0, iPredA0, LSR #24 206 ORR iPredA0, iPredA0, Temp1, LSL #8 207 MOV iPredA1, iPredA1, LSR #24 208 ORR iPredA1, iPredA1, Temp2, LSL #8 209 210 UHSUB8 ResultA, iPredA0, iPredB0 211 UHSUB8 ResultB, iPredA1, iPredB1 212 EOR ResultA, ResultA, r0x80808080 213 M_STR ResultA, [pDstPred], iDstStep 214 EOR ResultB, ResultB, r0x80808080 215 M_STR ResultB, [pDstPred], iDstStep 216End3 217 M_END 218 219 ENDIF 220 221 END 222