1/* 2 * Copyright (c) 2017 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "./vpx_dsp_rtcd.h" 12#include "vpx/vpx_integer.h" 13#include "vpx_ports/mem.h" 14#include "vpx_ports/asmdefs_mmi.h" 15 16void vpx_subtract_block_mmi(int rows, int cols, int16_t *diff, 17 ptrdiff_t diff_stride, const uint8_t *src, 18 ptrdiff_t src_stride, const uint8_t *pred, 19 ptrdiff_t pred_stride) { 20 double ftmp[13]; 21 uint32_t tmp[1]; 22 23 if (rows == cols) { 24 switch (rows) { 25 case 4: 26 __asm__ volatile( 27 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 28#if _MIPS_SIM == _ABIO32 29 "ulw %[tmp0], 0x00(%[src]) \n\t" 30 "mtc1 %[tmp0], %[ftmp1] \n\t" 31 "ulw %[tmp0], 0x00(%[pred]) \n\t" 32 "mtc1 %[tmp0], %[ftmp2] \n\t" 33#else 34 "gslwlc1 %[ftmp1], 0x03(%[src]) \n\t" 35 "gslwrc1 %[ftmp1], 0x00(%[src]) \n\t" 36 "gslwlc1 %[ftmp2], 0x03(%[pred]) \n\t" 37 "gslwrc1 %[ftmp2], 0x00(%[pred]) \n\t" 38#endif 39 MMI_ADDU(%[src], %[src], %[src_stride]) 40 MMI_ADDU(%[pred], %[pred], %[pred_stride]) 41 42#if _MIPS_SIM == _ABIO32 43 "ulw %[tmp0], 0x00(%[src]) \n\t" 44 "mtc1 %[tmp0], %[ftmp3] \n\t" 45 "ulw %[tmp0], 0x00(%[pred]) \n\t" 46 "mtc1 %[tmp0], %[ftmp4] \n\t" 47#else 48 "gslwlc1 %[ftmp3], 0x03(%[src]) \n\t" 49 "gslwrc1 %[ftmp3], 0x00(%[src]) \n\t" 50 "gslwlc1 %[ftmp4], 0x03(%[pred]) \n\t" 51 "gslwrc1 %[ftmp4], 0x00(%[pred]) \n\t" 52#endif 53 MMI_ADDU(%[src], %[src], %[src_stride]) 54 MMI_ADDU(%[pred], %[pred], %[pred_stride]) 55 56#if _MIPS_SIM == _ABIO32 57 "ulw %[tmp0], 0x00(%[src]) \n\t" 58 "mtc1 %[tmp0], %[ftmp5] \n\t" 59 "ulw %[tmp0], 0x00(%[pred]) \n\t" 60 "mtc1 %[tmp0], %[ftmp6] \n\t" 61#else 62 "gslwlc1 %[ftmp5], 0x03(%[src]) \n\t" 63 "gslwrc1 %[ftmp5], 0x00(%[src]) \n\t" 64 "gslwlc1 %[ftmp6], 0x03(%[pred]) \n\t" 65 "gslwrc1 %[ftmp6], 0x00(%[pred]) \n\t" 66#endif 67 MMI_ADDU(%[src], %[src], %[src_stride]) 68 MMI_ADDU(%[pred], %[pred], %[pred_stride]) 69 70#if _MIPS_SIM == _ABIO32 71 "ulw %[tmp0], 0x00(%[src]) \n\t" 72 "mtc1 %[tmp0], %[ftmp7] \n\t" 73 "ulw %[tmp0], 0x00(%[pred]) \n\t" 74 "mtc1 %[tmp0], %[ftmp8] \n\t" 75#else 76 "gslwlc1 %[ftmp7], 0x03(%[src]) \n\t" 77 "gslwrc1 %[ftmp7], 0x00(%[src]) \n\t" 78 "gslwlc1 %[ftmp8], 0x03(%[pred]) \n\t" 79 "gslwrc1 %[ftmp8], 0x00(%[pred]) \n\t" 80#endif 81 "punpcklbh %[ftmp9], %[ftmp1], %[ftmp0] \n\t" 82 "punpcklbh %[ftmp10], %[ftmp2], %[ftmp0] \n\t" 83 "psubh %[ftmp11], %[ftmp9], %[ftmp10] \n\t" 84 "gssdlc1 %[ftmp11], 0x07(%[diff]) \n\t" 85 "gssdrc1 %[ftmp11], 0x00(%[diff]) \n\t" 86 MMI_ADDU(%[diff], %[diff], %[diff_stride]) 87 "punpcklbh %[ftmp9], %[ftmp3], %[ftmp0] \n\t" 88 "punpcklbh %[ftmp10], %[ftmp4], %[ftmp0] \n\t" 89 "psubh %[ftmp11], %[ftmp9], %[ftmp10] \n\t" 90 "gssdlc1 %[ftmp11], 0x07(%[diff]) \n\t" 91 "gssdrc1 %[ftmp11], 0x00(%[diff]) \n\t" 92 MMI_ADDU(%[diff], %[diff], %[diff_stride]) 93 "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t" 94 "punpcklbh %[ftmp10], %[ftmp6], %[ftmp0] \n\t" 95 "psubh %[ftmp11], %[ftmp9], %[ftmp10] \n\t" 96 "gssdlc1 %[ftmp11], 0x07(%[diff]) \n\t" 97 "gssdrc1 %[ftmp11], 0x00(%[diff]) \n\t" 98 MMI_ADDU(%[diff], %[diff], %[diff_stride]) 99 "punpcklbh %[ftmp9], %[ftmp7], %[ftmp0] \n\t" 100 "punpcklbh %[ftmp10], %[ftmp8], %[ftmp0] \n\t" 101 "psubh %[ftmp11], %[ftmp9], %[ftmp10] \n\t" 102 "gssdlc1 %[ftmp11], 0x07(%[diff]) \n\t" 103 "gssdrc1 %[ftmp11], 0x00(%[diff]) \n\t" 104 : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), 105 [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]), 106 [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]), 107 [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), 108 [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]), 109 [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]), 110#if _MIPS_SIM == _ABIO32 111 [tmp0] "=&r"(tmp[0]), 112#endif 113 [src] "+&r"(src), [pred] "+&r"(pred), [diff] "+&r"(diff) 114 : [src_stride] "r"((mips_reg)src_stride), 115 [pred_stride] "r"((mips_reg)pred_stride), 116 [diff_stride] "r"((mips_reg)(diff_stride * 2)) 117 : "memory"); 118 break; 119 case 8: 120 __asm__ volatile( 121 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 122 "li %[tmp0], 0x02 \n\t" 123 "1: \n\t" 124 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" 125 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" 126 "gsldlc1 %[ftmp2], 0x07(%[pred]) \n\t" 127 "gsldrc1 %[ftmp2], 0x00(%[pred]) \n\t" 128 MMI_ADDU(%[src], %[src], %[src_stride]) 129 MMI_ADDU(%[pred], %[pred], %[pred_stride]) 130 "gsldlc1 %[ftmp3], 0x07(%[src]) \n\t" 131 "gsldrc1 %[ftmp3], 0x00(%[src]) \n\t" 132 "gsldlc1 %[ftmp4], 0x07(%[pred]) \n\t" 133 "gsldrc1 %[ftmp4], 0x00(%[pred]) \n\t" 134 MMI_ADDU(%[src], %[src], %[src_stride]) 135 MMI_ADDU(%[pred], %[pred], %[pred_stride]) 136 "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t" 137 "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t" 138 "gsldlc1 %[ftmp6], 0x07(%[pred]) \n\t" 139 "gsldrc1 %[ftmp6], 0x00(%[pred]) \n\t" 140 MMI_ADDU(%[src], %[src], %[src_stride]) 141 MMI_ADDU(%[pred], %[pred], %[pred_stride]) 142 "gsldlc1 %[ftmp7], 0x07(%[src]) \n\t" 143 "gsldrc1 %[ftmp7], 0x00(%[src]) \n\t" 144 "gsldlc1 %[ftmp8], 0x07(%[pred]) \n\t" 145 "gsldrc1 %[ftmp8], 0x00(%[pred]) \n\t" 146 MMI_ADDU(%[src], %[src], %[src_stride]) 147 MMI_ADDU(%[pred], %[pred], %[pred_stride]) 148 "punpcklbh %[ftmp9], %[ftmp1], %[ftmp0] \n\t" 149 "punpckhbh %[ftmp10], %[ftmp1], %[ftmp0] \n\t" 150 "punpcklbh %[ftmp11], %[ftmp2], %[ftmp0] \n\t" 151 "punpckhbh %[ftmp12], %[ftmp2], %[ftmp0] \n\t" 152 "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t" 153 "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t" 154 "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t" 155 "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t" 156 "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t" 157 "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t" 158 MMI_ADDU(%[diff], %[diff], %[diff_stride]) 159 "punpcklbh %[ftmp9], %[ftmp3], %[ftmp0] \n\t" 160 "punpckhbh %[ftmp10], %[ftmp3], %[ftmp0] \n\t" 161 "punpcklbh %[ftmp11], %[ftmp4], %[ftmp0] \n\t" 162 "punpckhbh %[ftmp12], %[ftmp4], %[ftmp0] \n\t" 163 "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t" 164 "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t" 165 "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t" 166 "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t" 167 "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t" 168 "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t" 169 MMI_ADDU(%[diff], %[diff], %[diff_stride]) 170 "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t" 171 "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t" 172 "punpcklbh %[ftmp11], %[ftmp6], %[ftmp0] \n\t" 173 "punpckhbh %[ftmp12], %[ftmp6], %[ftmp0] \n\t" 174 "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t" 175 "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t" 176 "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t" 177 "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t" 178 "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t" 179 "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t" 180 MMI_ADDU(%[diff], %[diff], %[diff_stride]) 181 "punpcklbh %[ftmp9], %[ftmp7], %[ftmp0] \n\t" 182 "punpckhbh %[ftmp10], %[ftmp7], %[ftmp0] \n\t" 183 "punpcklbh %[ftmp11], %[ftmp8], %[ftmp0] \n\t" 184 "punpckhbh %[ftmp12], %[ftmp8], %[ftmp0] \n\t" 185 "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t" 186 "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t" 187 "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t" 188 "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t" 189 "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t" 190 "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t" 191 MMI_ADDU(%[diff], %[diff], %[diff_stride]) 192 "addiu %[tmp0], %[tmp0], -0x01 \n\t" 193 "bnez %[tmp0], 1b \n\t" 194 : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), 195 [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]), 196 [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]), 197 [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), 198 [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]), 199 [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]), 200 [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]), [src] "+&r"(src), 201 [pred] "+&r"(pred), [diff] "+&r"(diff) 202 : [pred_stride] "r"((mips_reg)pred_stride), 203 [src_stride] "r"((mips_reg)src_stride), 204 [diff_stride] "r"((mips_reg)(diff_stride * 2)) 205 : "memory"); 206 break; 207 case 16: 208 __asm__ volatile( 209 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 210 "li %[tmp0], 0x08 \n\t" 211 "1: \n\t" 212 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" 213 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" 214 "gsldlc1 %[ftmp2], 0x07(%[pred]) \n\t" 215 "gsldrc1 %[ftmp2], 0x00(%[pred]) \n\t" 216 "gsldlc1 %[ftmp3], 0x0f(%[src]) \n\t" 217 "gsldrc1 %[ftmp3], 0x08(%[src]) \n\t" 218 "gsldlc1 %[ftmp4], 0x0f(%[pred]) \n\t" 219 "gsldrc1 %[ftmp4], 0x08(%[pred]) \n\t" 220 MMI_ADDU(%[src], %[src], %[src_stride]) 221 MMI_ADDU(%[pred], %[pred], %[pred_stride]) 222 "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t" 223 "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t" 224 "gsldlc1 %[ftmp6], 0x07(%[pred]) \n\t" 225 "gsldrc1 %[ftmp6], 0x00(%[pred]) \n\t" 226 "gsldlc1 %[ftmp7], 0x0f(%[src]) \n\t" 227 "gsldrc1 %[ftmp7], 0x08(%[src]) \n\t" 228 "gsldlc1 %[ftmp8], 0x0f(%[pred]) \n\t" 229 "gsldrc1 %[ftmp8], 0x08(%[pred]) \n\t" 230 MMI_ADDU(%[src], %[src], %[src_stride]) 231 MMI_ADDU(%[pred], %[pred], %[pred_stride]) 232 "punpcklbh %[ftmp9], %[ftmp1], %[ftmp0] \n\t" 233 "punpckhbh %[ftmp10], %[ftmp1], %[ftmp0] \n\t" 234 "punpcklbh %[ftmp11], %[ftmp2], %[ftmp0] \n\t" 235 "punpckhbh %[ftmp12], %[ftmp2], %[ftmp0] \n\t" 236 "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t" 237 "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t" 238 "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t" 239 "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t" 240 "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t" 241 "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t" 242 "punpcklbh %[ftmp9], %[ftmp3], %[ftmp0] \n\t" 243 "punpckhbh %[ftmp10], %[ftmp3], %[ftmp0] \n\t" 244 "punpcklbh %[ftmp11], %[ftmp4], %[ftmp0] \n\t" 245 "punpckhbh %[ftmp12], %[ftmp4], %[ftmp0] \n\t" 246 "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t" 247 "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t" 248 "gssdlc1 %[ftmp9], 0x17(%[diff]) \n\t" 249 "gssdrc1 %[ftmp9], 0x10(%[diff]) \n\t" 250 "gssdlc1 %[ftmp10], 0x1f(%[diff]) \n\t" 251 "gssdrc1 %[ftmp10], 0x18(%[diff]) \n\t" 252 MMI_ADDU(%[diff], %[diff], %[diff_stride]) 253 "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t" 254 "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t" 255 "punpcklbh %[ftmp11], %[ftmp6], %[ftmp0] \n\t" 256 "punpckhbh %[ftmp12], %[ftmp6], %[ftmp0] \n\t" 257 "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t" 258 "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t" 259 "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t" 260 "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t" 261 "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t" 262 "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t" 263 "punpcklbh %[ftmp9], %[ftmp7], %[ftmp0] \n\t" 264 "punpckhbh %[ftmp10], %[ftmp7], %[ftmp0] \n\t" 265 "punpcklbh %[ftmp11], %[ftmp8], %[ftmp0] \n\t" 266 "punpckhbh %[ftmp12], %[ftmp8], %[ftmp0] \n\t" 267 "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t" 268 "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t" 269 "gssdlc1 %[ftmp9], 0x17(%[diff]) \n\t" 270 "gssdrc1 %[ftmp9], 0x10(%[diff]) \n\t" 271 "gssdlc1 %[ftmp10], 0x1f(%[diff]) \n\t" 272 "gssdrc1 %[ftmp10], 0x18(%[diff]) \n\t" 273 MMI_ADDU(%[diff], %[diff], %[diff_stride]) 274 "addiu %[tmp0], %[tmp0], -0x01 \n\t" 275 "bnez %[tmp0], 1b \n\t" 276 : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), 277 [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]), 278 [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]), 279 [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), 280 [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]), 281 [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]), 282 [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]), [src] "+&r"(src), 283 [pred] "+&r"(pred), [diff] "+&r"(diff) 284 : [pred_stride] "r"((mips_reg)pred_stride), 285 [src_stride] "r"((mips_reg)src_stride), 286 [diff_stride] "r"((mips_reg)(diff_stride * 2)) 287 : "memory"); 288 break; 289 case 32: 290 vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride, 291 pred, pred_stride); 292 break; 293 case 64: 294 vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride, 295 pred, pred_stride); 296 break; 297 default: 298 vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride, 299 pred, pred_stride); 300 break; 301 } 302 } else { 303 vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride, pred, 304 pred_stride); 305 } 306} 307