1/* 2 * Copyright (c) 2017 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "vp8/common/filter.h" 12#include "vpx_ports/asmdefs_mmi.h" 13 14DECLARE_ALIGNED(8, static const int16_t, vp8_six_tap_mmi[8][6 * 8]) = { 15 { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 16 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 17 0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 18 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 19 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 20 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, 21 { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 22 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 23 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 24 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 25 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 26 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, 27 { 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 28 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 29 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 30 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 31 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 32 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001 }, 33 { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 34 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 35 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 36 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 37 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 38 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, 39 { 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 40 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 41 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 42 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 43 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 44 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003 }, 45 { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 46 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 47 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 48 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 49 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 50 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, 51 { 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 52 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 53 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 54 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 55 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 56 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002 }, 57 { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 58 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 59 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 60 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 61 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 62 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 } 63}; 64 65/* Horizontal filter: pixel_step is 1, output_height and output_width are 66 the size of horizontal filtering output, output_height is always H + 5 */ 67static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr, 68 uint16_t *output_ptr, 69 unsigned int src_pixels_per_line, 70 unsigned int output_height, 71 unsigned int output_width, 72 const int16_t *vp8_filter) { 73 uint32_t tmp[1]; 74 DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL }; 75 76#if _MIPS_SIM == _ABIO32 77 register double fzero asm("$f0"); 78 register double ftmp0 asm("$f2"); 79 register double ftmp1 asm("$f4"); 80 register double ftmp2 asm("$f6"); 81 register double ftmp3 asm("$f8"); 82 register double ftmp4 asm("$f10"); 83 register double ftmp5 asm("$f12"); 84 register double ftmp6 asm("$f14"); 85 register double ftmp7 asm("$f16"); 86 register double ftmp8 asm("$f18"); 87 register double ftmp9 asm("$f20"); 88 register double ftmp10 asm("$f22"); 89 register double ftmp11 asm("$f24"); 90#else 91 register double fzero asm("$f0"); 92 register double ftmp0 asm("$f1"); 93 register double ftmp1 asm("$f2"); 94 register double ftmp2 asm("$f3"); 95 register double ftmp3 asm("$f4"); 96 register double ftmp4 asm("$f5"); 97 register double ftmp5 asm("$f6"); 98 register double ftmp6 asm("$f7"); 99 register double ftmp7 asm("$f8"); 100 register double ftmp8 asm("$f9"); 101 register double ftmp9 asm("$f10"); 102 register double ftmp10 asm("$f11"); 103 register double ftmp11 asm("$f12"); 104#endif // _MIPS_SIM == _ABIO32 105 106 __asm__ volatile ( 107 "ldc1 %[ftmp0], 0x00(%[vp8_filter]) \n\t" 108 "ldc1 %[ftmp1], 0x10(%[vp8_filter]) \n\t" 109 "ldc1 %[ftmp2], 0x20(%[vp8_filter]) \n\t" 110 "ldc1 %[ftmp3], 0x30(%[vp8_filter]) \n\t" 111 "ldc1 %[ftmp4], 0x40(%[vp8_filter]) \n\t" 112 "ldc1 %[ftmp5], 0x50(%[vp8_filter]) \n\t" 113 "xor %[fzero], %[fzero], %[fzero] \n\t" 114 "li %[tmp0], 0x07 \n\t" 115 "mtc1 %[tmp0], %[ftmp7] \n\t" 116 "li %[tmp0], 0x08 \n\t" 117 "mtc1 %[tmp0], %[ftmp11] \n\t" 118 119 "1: \n\t" 120 "gsldlc1 %[ftmp9], 0x05(%[src_ptr]) \n\t" 121 "gsldrc1 %[ftmp9], -0x02(%[src_ptr]) \n\t" 122 "gsldlc1 %[ftmp10], 0x06(%[src_ptr]) \n\t" 123 "gsldrc1 %[ftmp10], -0x01(%[src_ptr]) \n\t" 124 125 "punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t" 126 "pmullh %[ftmp8], %[ftmp6], %[ftmp0] \n\t" 127 128 "punpckhbh %[ftmp6], %[ftmp9], %[fzero] \n\t" 129 "pmullh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" 130 "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" 131 132 "punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t" 133 "pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" 134 "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" 135 136 "punpckhbh %[ftmp6], %[ftmp10], %[fzero] \n\t" 137 "pmullh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" 138 "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" 139 140 "dsrl %[ftmp10], %[ftmp10], %[ftmp11] \n\t" 141 "punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t" 142 "pmullh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" 143 "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" 144 145 "dsrl %[ftmp10], %[ftmp10], %[ftmp11] \n\t" 146 "punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t" 147 "pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" 148 "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" 149 150 "paddsh %[ftmp8], %[ftmp8], %[ff_ph_40] \n\t" 151 "psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t" 152 "packushb %[ftmp8], %[ftmp8], %[fzero] \n\t" 153 "punpcklbh %[ftmp8], %[ftmp8], %[fzero] \n\t" 154 "gssdlc1 %[ftmp8], 0x07(%[output_ptr]) \n\t" 155 "gssdrc1 %[ftmp8], 0x00(%[output_ptr]) \n\t" 156 157 "addiu %[output_height], %[output_height], -0x01 \n\t" 158 MMI_ADDU(%[output_ptr], %[output_ptr], %[output_width]) 159 MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixels_per_line]) 160 "bnez %[output_height], 1b \n\t" 161 : [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0), 162 [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), 163 [ftmp3]"=&f"(ftmp3), [ftmp4]"=&f"(ftmp4), 164 [ftmp5]"=&f"(ftmp5), [ftmp6]"=&f"(ftmp6), 165 [ftmp7]"=&f"(ftmp7), [ftmp8]"=&f"(ftmp8), 166 [ftmp9]"=&f"(ftmp9), [ftmp10]"=&f"(ftmp10), 167 [ftmp11]"=&f"(ftmp11), [tmp0]"=&r"(tmp[0]), 168 [output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height), 169 [src_ptr]"+&r"(src_ptr) 170 : [src_pixels_per_line]"r"((mips_reg)src_pixels_per_line), 171 [vp8_filter]"r"(vp8_filter), [output_width]"r"(output_width), 172 [ff_ph_40]"f"(ff_ph_40) 173 : "memory" 174 ); 175} 176 177/* Horizontal filter: pixel_step is always W */ 178static INLINE void vp8_filter_block1dc_v6_mmi( 179 uint16_t *src_ptr, unsigned char *output_ptr, unsigned int output_height, 180 int output_pitch, unsigned int pixels_per_line, const int16_t *vp8_filter) { 181 DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL }; 182 uint32_t tmp[1]; 183 mips_reg addr[1]; 184#if _MIPS_SIM == _ABIO32 185 register double fzero asm("$f0"); 186 register double ftmp0 asm("$f2"); 187 register double ftmp1 asm("$f4"); 188 register double ftmp2 asm("$f6"); 189 register double ftmp3 asm("$f8"); 190 register double ftmp4 asm("$f10"); 191 register double ftmp5 asm("$f12"); 192 register double ftmp6 asm("$f14"); 193 register double ftmp7 asm("$f16"); 194 register double ftmp8 asm("$f18"); 195 register double ftmp9 asm("$f20"); 196 register double ftmp10 asm("$f22"); 197 register double ftmp11 asm("$f24"); 198 register double ftmp12 asm("$f26"); 199 register double ftmp13 asm("$f28"); 200#else 201 register double fzero asm("$f0"); 202 register double ftmp0 asm("$f1"); 203 register double ftmp1 asm("$f2"); 204 register double ftmp2 asm("$f3"); 205 register double ftmp3 asm("$f4"); 206 register double ftmp4 asm("$f5"); 207 register double ftmp5 asm("$f6"); 208 register double ftmp6 asm("$f7"); 209 register double ftmp7 asm("$f8"); 210 register double ftmp8 asm("$f9"); 211 register double ftmp9 asm("$f10"); 212 register double ftmp10 asm("$f11"); 213 register double ftmp11 asm("$f12"); 214 register double ftmp12 asm("$f13"); 215 register double ftmp13 asm("$f14"); 216#endif // _MIPS_SIM == _ABIO32 217 218 __asm__ volatile ( 219 "ldc1 %[ftmp0], 0x00(%[vp8_filter]) \n\t" 220 "ldc1 %[ftmp1], 0x10(%[vp8_filter]) \n\t" 221 "ldc1 %[ftmp2], 0x20(%[vp8_filter]) \n\t" 222 "ldc1 %[ftmp3], 0x30(%[vp8_filter]) \n\t" 223 "ldc1 %[ftmp4], 0x40(%[vp8_filter]) \n\t" 224 "ldc1 %[ftmp5], 0x50(%[vp8_filter]) \n\t" 225 "xor %[fzero], %[fzero], %[fzero] \n\t" 226 "li %[tmp0], 0x07 \n\t" 227 "mtc1 %[tmp0], %[ftmp13] \n\t" 228 229 /* In order to make full use of memory load delay slot, 230 * Operation of memory loading and calculating has been rearranged. 231 */ 232 "1: \n\t" 233 "gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t" 234 "gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t" 235 MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line]) 236 "gsldlc1 %[ftmp7], 0x07(%[addr0]) \n\t" 237 "gsldrc1 %[ftmp7], 0x00(%[addr0]) \n\t" 238 MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x2]) 239 "gsldlc1 %[ftmp8], 0x07(%[addr0]) \n\t" 240 "gsldrc1 %[ftmp8], 0x00(%[addr0]) \n\t" 241 242 MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x4]) 243 "gsldlc1 %[ftmp9], 0x07(%[addr0]) \n\t" 244 "gsldrc1 %[ftmp9], 0x00(%[addr0]) \n\t" 245 MMI_ADDU(%[src_ptr], %[src_ptr], %[pixels_per_line]) 246 MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x2]) 247 "gsldlc1 %[ftmp10], 0x07(%[addr0]) \n\t" 248 "gsldrc1 %[ftmp10], 0x00(%[addr0]) \n\t" 249 MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x4]) 250 "gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t" 251 "gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t" 252 253 "pmullh %[ftmp12], %[ftmp6], %[ftmp0] \n\t" 254 255 "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" 256 "paddsh %[ftmp12], %[ftmp12], %[ftmp7] \n\t" 257 258 "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" 259 "paddsh %[ftmp12], %[ftmp12], %[ftmp8] \n\t" 260 261 "pmullh %[ftmp9], %[ftmp9], %[ftmp4] \n\t" 262 "paddsh %[ftmp12], %[ftmp12], %[ftmp9] \n\t" 263 264 "pmullh %[ftmp10], %[ftmp10], %[ftmp3] \n\t" 265 "paddsh %[ftmp12], %[ftmp12], %[ftmp10] \n\t" 266 267 "pmullh %[ftmp11], %[ftmp11], %[ftmp5] \n\t" 268 "paddsh %[ftmp12], %[ftmp12], %[ftmp11] \n\t" 269 270 "paddsh %[ftmp12], %[ftmp12], %[ff_ph_40] \n\t" 271 "psrah %[ftmp12], %[ftmp12], %[ftmp13] \n\t" 272 "packushb %[ftmp12], %[ftmp12], %[fzero] \n\t" 273 "gsswlc1 %[ftmp12], 0x03(%[output_ptr]) \n\t" 274 "gsswrc1 %[ftmp12], 0x00(%[output_ptr]) \n\t" 275 276 MMI_ADDIU(%[output_height], %[output_height], -0x01) 277 MMI_ADDU(%[output_ptr], %[output_ptr], %[output_pitch]) 278 "bnez %[output_height], 1b \n\t" 279 : [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0), 280 [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), 281 [ftmp3]"=&f"(ftmp3), [ftmp4]"=&f"(ftmp4), 282 [ftmp5]"=&f"(ftmp5), [ftmp6]"=&f"(ftmp6), 283 [ftmp7]"=&f"(ftmp7), [ftmp8]"=&f"(ftmp8), 284 [ftmp9]"=&f"(ftmp9), [ftmp10]"=&f"(ftmp10), 285 [ftmp11]"=&f"(ftmp11), [ftmp12]"=&f"(ftmp12), 286 [ftmp13]"=&f"(ftmp13), [tmp0]"=&r"(tmp[0]), 287 [addr0]"=&r"(addr[0]), [src_ptr]"+&r"(src_ptr), 288 [output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height) 289 : [pixels_per_line]"r"((mips_reg)pixels_per_line), 290 [pixels_per_line_x2]"r"((mips_reg)(pixels_per_line<<1)), 291 [pixels_per_line_x4]"r"((mips_reg)(pixels_per_line<<2)), 292 [vp8_filter]"r"(vp8_filter), 293 [output_pitch]"r"((mips_reg)output_pitch), 294 [ff_ph_40]"f"(ff_ph_40) 295 : "memory" 296 ); 297} 298 299/* When xoffset == 0, vp8_filter= {0,0,128,0,0,0}, 300 function vp8_filter_block1d_h6_mmi and vp8_filter_block1d_v6_mmi can 301 be simplified */ 302static INLINE void vp8_filter_block1d_h6_filter0_mmi( 303 unsigned char *src_ptr, uint16_t *output_ptr, 304 unsigned int src_pixels_per_line, unsigned int output_height, 305 unsigned int output_width) { 306#if _MIPS_SIM == _ABIO32 307 register double fzero asm("$f0"); 308 register double ftmp0 asm("$f2"); 309 register double ftmp1 asm("$f4"); 310#else 311 register double fzero asm("$f0"); 312 register double ftmp0 asm("$f1"); 313 register double ftmp1 asm("$f2"); 314#endif // _MIPS_SIM == _ABIO32 315 316 __asm__ volatile ( 317 "xor %[fzero], %[fzero], %[fzero] \n\t" 318 319 "1: \n\t" 320 "gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t" 321 "gsldrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t" 322 MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixels_per_line]) 323 324 "punpcklbh %[ftmp1], %[ftmp0], %[fzero] \n\t" 325 "gssdlc1 %[ftmp1], 0x07(%[output_ptr]) \n\t" 326 "gssdrc1 %[ftmp1], 0x00(%[output_ptr]) \n\t" 327 328 "addiu %[output_height], %[output_height], -0x01 \n\t" 329 MMI_ADDU(%[output_ptr], %[output_ptr], %[output_width]) 330 "bnez %[output_height], 1b \n\t" 331 : [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0), 332 [ftmp1]"=&f"(ftmp1), [src_ptr]"+&r"(src_ptr), 333 [output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height) 334 : [src_pixels_per_line]"r"((mips_reg)src_pixels_per_line), 335 [output_width]"r"(output_width) 336 : "memory" 337 ); 338} 339 340static INLINE void vp8_filter_block1dc_v6_filter0_mmi( 341 uint16_t *src_ptr, unsigned char *output_ptr, unsigned int output_height, 342 int output_pitch, unsigned int pixels_per_line) { 343#if _MIPS_SIM == _ABIO32 344 register double fzero asm("$f0"); 345 register double ftmp0 asm("$f2"); 346 register double ftmp1 asm("$f4"); 347#else 348 register double fzero asm("$f0"); 349 register double ftmp0 asm("$f1"); 350 register double ftmp1 asm("$f2"); 351#endif // _MIPS_SIM == _ABIO32 352 353 __asm__ volatile ( 354 "xor %[fzero], %[fzero], %[fzero] \n\t" 355 356 "1: \n\t" 357 "gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t" 358 "gsldrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t" 359 MMI_ADDU(%[src_ptr], %[src_ptr], %[pixels_per_line]) 360 MMI_ADDIU(%[output_height], %[output_height], -0x01) 361 "packushb %[ftmp1], %[ftmp0], %[fzero] \n\t" 362 "gsswlc1 %[ftmp1], 0x03(%[output_ptr]) \n\t" 363 "gsswrc1 %[ftmp1], 0x00(%[output_ptr]) \n\t" 364 365 MMI_ADDU(%[output_ptr], %[output_ptr], %[output_pitch]) 366 "bnez %[output_height], 1b \n\t" 367 : [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0), 368 [ftmp1]"=&f"(ftmp1), [src_ptr]"+&r"(src_ptr), 369 [output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height) 370 : [pixels_per_line]"r"((mips_reg)pixels_per_line), 371 [output_pitch]"r"((mips_reg)output_pitch) 372 : "memory" 373 ); 374} 375 376#define sixtapNxM(n, m) \ 377 void vp8_sixtap_predict##n##x##m##_mmi( \ 378 unsigned char *src_ptr, int src_pixels_per_line, int xoffset, \ 379 int yoffset, unsigned char *dst_ptr, int dst_pitch) { \ 380 DECLARE_ALIGNED(16, uint16_t, \ 381 FData2[(n + 5) * (n == 16 ? 24 : (n == 8 ? 16 : n))]); \ 382 const int16_t *HFilter, *VFilter; \ 383 int i, loop = n / 4; \ 384 HFilter = vp8_six_tap_mmi[xoffset]; \ 385 VFilter = vp8_six_tap_mmi[yoffset]; \ 386 \ 387 if (xoffset == 0) { \ 388 for (i = 0; i < loop; ++i) { \ 389 vp8_filter_block1d_h6_filter0_mmi( \ 390 src_ptr - (2 * src_pixels_per_line) + i * 4, FData2 + i * 4, \ 391 src_pixels_per_line, m + 5, n * 2); \ 392 } \ 393 } else { \ 394 for (i = 0; i < loop; ++i) { \ 395 vp8_filter_block1d_h6_mmi(src_ptr - (2 * src_pixels_per_line) + i * 4, \ 396 FData2 + i * 4, src_pixels_per_line, m + 5, \ 397 n * 2, HFilter); \ 398 } \ 399 } \ 400 if (yoffset == 0) { \ 401 for (i = 0; i < loop; ++i) { \ 402 vp8_filter_block1dc_v6_filter0_mmi( \ 403 FData2 + n * 2 + i * 4, dst_ptr + i * 4, m, dst_pitch, n * 2); \ 404 } \ 405 } else { \ 406 for (i = 0; i < loop; ++i) { \ 407 vp8_filter_block1dc_v6_mmi(FData2 + i * 4, dst_ptr + i * 4, m, \ 408 dst_pitch, n * 2, VFilter); \ 409 } \ 410 } \ 411 } 412 413sixtapNxM(4, 4); 414sixtapNxM(8, 8); 415sixtapNxM(8, 4); 416sixtapNxM(16, 16); 417