1// Copyright 2014 Google Inc. All Rights Reserved. 2// 3// Use of this source code is governed by a BSD-style license 4// that can be found in the COPYING file in the root of the source 5// tree. An additional intellectual property rights grant can be found 6// in the file PATENTS. All contributing project authors may 7// be found in the AUTHORS file in the root of the source tree. 8// ----------------------------------------------------------------------------- 9// 10// Image transforms and color space conversion methods for lossless decoder. 11// 12// Author(s): Djordje Pesut (djordje.pesut@imgtec.com) 13// Jovan Zelincevic (jovan.zelincevic@imgtec.com) 14 15#include "./dsp.h" 16 17#if defined(WEBP_USE_MIPS_DSP_R2) 18 19#include "./lossless.h" 20 21#define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \ 22static void FUNC_NAME(const TYPE* src, \ 23 const uint32_t* const color_map, \ 24 TYPE* dst, int y_start, int y_end, \ 25 int width) { \ 26 int y; \ 27 for (y = y_start; y < y_end; ++y) { \ 28 int x; \ 29 for (x = 0; x < (width >> 2); ++x) { \ 30 int tmp1, tmp2, tmp3, tmp4; \ 31 __asm__ volatile ( \ 32 ".ifc " #TYPE ", uint8_t \n\t" \ 33 "lbu %[tmp1], 0(%[src]) \n\t" \ 34 "lbu %[tmp2], 1(%[src]) \n\t" \ 35 "lbu %[tmp3], 2(%[src]) \n\t" \ 36 "lbu %[tmp4], 3(%[src]) \n\t" \ 37 "addiu %[src], %[src], 4 \n\t" \ 38 ".endif \n\t" \ 39 ".ifc " #TYPE ", uint32_t \n\t" \ 40 "lw %[tmp1], 0(%[src]) \n\t" \ 41 "lw %[tmp2], 4(%[src]) \n\t" \ 42 "lw %[tmp3], 8(%[src]) \n\t" \ 43 "lw %[tmp4], 12(%[src]) \n\t" \ 44 "ext %[tmp1], %[tmp1], 8, 8 \n\t" \ 45 "ext %[tmp2], %[tmp2], 8, 8 \n\t" \ 46 "ext %[tmp3], %[tmp3], 8, 8 \n\t" \ 47 "ext %[tmp4], %[tmp4], 8, 8 \n\t" \ 48 "addiu %[src], %[src], 16 \n\t" \ 49 ".endif \n\t" \ 50 "sll %[tmp1], %[tmp1], 2 \n\t" \ 51 "sll %[tmp2], %[tmp2], 2 \n\t" \ 52 "sll %[tmp3], %[tmp3], 2 \n\t" \ 53 "sll %[tmp4], %[tmp4], 2 \n\t" \ 54 "lwx %[tmp1], %[tmp1](%[color_map]) \n\t" \ 55 "lwx %[tmp2], %[tmp2](%[color_map]) \n\t" \ 56 "lwx %[tmp3], %[tmp3](%[color_map]) \n\t" \ 57 "lwx %[tmp4], %[tmp4](%[color_map]) \n\t" \ 58 ".ifc " #TYPE ", uint8_t \n\t" \ 59 "ext %[tmp1], %[tmp1], 8, 8 \n\t" \ 60 "ext %[tmp2], %[tmp2], 8, 8 \n\t" \ 61 "ext %[tmp3], %[tmp3], 8, 8 \n\t" \ 62 "ext %[tmp4], %[tmp4], 8, 8 \n\t" \ 63 "sb %[tmp1], 0(%[dst]) \n\t" \ 64 "sb %[tmp2], 1(%[dst]) \n\t" \ 65 "sb %[tmp3], 2(%[dst]) \n\t" \ 66 "sb %[tmp4], 3(%[dst]) \n\t" \ 67 "addiu %[dst], %[dst], 4 \n\t" \ 68 ".endif \n\t" \ 69 ".ifc " #TYPE ", uint32_t \n\t" \ 70 "sw %[tmp1], 0(%[dst]) \n\t" \ 71 "sw %[tmp2], 4(%[dst]) \n\t" \ 72 "sw %[tmp3], 8(%[dst]) \n\t" \ 73 "sw %[tmp4], 12(%[dst]) \n\t" \ 74 "addiu %[dst], %[dst], 16 \n\t" \ 75 ".endif \n\t" \ 76 : [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3), \ 77 [tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst) \ 78 : [color_map]"r"(color_map) \ 79 : "memory" \ 80 ); \ 81 } \ 82 for (x = 0; x < (width & 3); ++x) { \ 83 *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \ 84 } \ 85 } \ 86} 87 88MAP_COLOR_FUNCS(MapARGB, uint32_t, VP8GetARGBIndex, VP8GetARGBValue) 89MAP_COLOR_FUNCS(MapAlpha, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue) 90 91#undef MAP_COLOR_FUNCS 92 93static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, 94 uint32_t c2) { 95 int temp0, temp1, temp2, temp3, temp4, temp5; 96 __asm__ volatile ( 97 "preceu.ph.qbr %[temp1], %[c0] \n\t" 98 "preceu.ph.qbl %[temp2], %[c0] \n\t" 99 "preceu.ph.qbr %[temp3], %[c1] \n\t" 100 "preceu.ph.qbl %[temp4], %[c1] \n\t" 101 "preceu.ph.qbr %[temp5], %[c2] \n\t" 102 "preceu.ph.qbl %[temp0], %[c2] \n\t" 103 "subq.ph %[temp3], %[temp3], %[temp5] \n\t" 104 "subq.ph %[temp4], %[temp4], %[temp0] \n\t" 105 "addq.ph %[temp1], %[temp1], %[temp3] \n\t" 106 "addq.ph %[temp2], %[temp2], %[temp4] \n\t" 107 "shll_s.ph %[temp1], %[temp1], 7 \n\t" 108 "shll_s.ph %[temp2], %[temp2], 7 \n\t" 109 "precrqu_s.qb.ph %[temp2], %[temp2], %[temp1] \n\t" 110 : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 111 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5) 112 : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2) 113 : "memory" 114 ); 115 return temp2; 116} 117 118static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, 119 uint32_t c2) { 120 int temp0, temp1, temp2, temp3, temp4, temp5; 121 __asm__ volatile ( 122 "adduh.qb %[temp5], %[c0], %[c1] \n\t" 123 "preceu.ph.qbr %[temp3], %[c2] \n\t" 124 "preceu.ph.qbr %[temp1], %[temp5] \n\t" 125 "preceu.ph.qbl %[temp2], %[temp5] \n\t" 126 "preceu.ph.qbl %[temp4], %[c2] \n\t" 127 "subq.ph %[temp3], %[temp1], %[temp3] \n\t" 128 "subq.ph %[temp4], %[temp2], %[temp4] \n\t" 129 "shrl.ph %[temp5], %[temp3], 15 \n\t" 130 "shrl.ph %[temp0], %[temp4], 15 \n\t" 131 "addq.ph %[temp3], %[temp3], %[temp5] \n\t" 132 "addq.ph %[temp4], %[temp0], %[temp4] \n\t" 133 "shra.ph %[temp3], %[temp3], 1 \n\t" 134 "shra.ph %[temp4], %[temp4], 1 \n\t" 135 "addq.ph %[temp1], %[temp1], %[temp3] \n\t" 136 "addq.ph %[temp2], %[temp2], %[temp4] \n\t" 137 "shll_s.ph %[temp1], %[temp1], 7 \n\t" 138 "shll_s.ph %[temp2], %[temp2], 7 \n\t" 139 "precrqu_s.qb.ph %[temp1], %[temp2], %[temp1] \n\t" 140 : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 141 [temp3]"=&r"(temp3), [temp4]"=r"(temp4), [temp5]"=&r"(temp5) 142 : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2) 143 : "memory" 144 ); 145 return temp1; 146} 147 148static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { 149 int temp0, temp1, temp2, temp3, temp4, temp5; 150 __asm__ volatile ( 151 "cmpgdu.lt.qb %[temp1], %[c], %[b] \n\t" 152 "pick.qb %[temp1], %[b], %[c] \n\t" 153 "pick.qb %[temp2], %[c], %[b] \n\t" 154 "cmpgdu.lt.qb %[temp4], %[c], %[a] \n\t" 155 "pick.qb %[temp4], %[a], %[c] \n\t" 156 "pick.qb %[temp5], %[c], %[a] \n\t" 157 "subu.qb %[temp3], %[temp1], %[temp2] \n\t" 158 "subu.qb %[temp0], %[temp4], %[temp5] \n\t" 159 "raddu.w.qb %[temp3], %[temp3] \n\t" 160 "raddu.w.qb %[temp0], %[temp0] \n\t" 161 "subu %[temp3], %[temp3], %[temp0] \n\t" 162 "slti %[temp0], %[temp3], 0x1 \n\t" 163 "movz %[a], %[b], %[temp0] \n\t" 164 : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), 165 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp0]"=&r"(temp0), 166 [a]"+&r"(a) 167 : [b]"r"(b), [c]"r"(c) 168 ); 169 return a; 170} 171 172static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) { 173 __asm__ volatile ( 174 "adduh.qb %[a0], %[a0], %[a1] \n\t" 175 : [a0]"+r"(a0) 176 : [a1]"r"(a1) 177 ); 178 return a0; 179} 180 181static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { 182 return Average2(Average2(a0, a2), a1); 183} 184 185static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, 186 uint32_t a2, uint32_t a3) { 187 return Average2(Average2(a0, a1), Average2(a2, a3)); 188} 189 190static uint32_t Predictor5(uint32_t left, const uint32_t* const top) { 191 return Average3(left, top[0], top[1]); 192} 193 194static uint32_t Predictor6(uint32_t left, const uint32_t* const top) { 195 return Average2(left, top[-1]); 196} 197 198static uint32_t Predictor7(uint32_t left, const uint32_t* const top) { 199 return Average2(left, top[0]); 200} 201 202static uint32_t Predictor8(uint32_t left, const uint32_t* const top) { 203 (void)left; 204 return Average2(top[-1], top[0]); 205} 206 207static uint32_t Predictor9(uint32_t left, const uint32_t* const top) { 208 (void)left; 209 return Average2(top[0], top[1]); 210} 211 212static uint32_t Predictor10(uint32_t left, const uint32_t* const top) { 213 return Average4(left, top[-1], top[0], top[1]); 214} 215 216static uint32_t Predictor11(uint32_t left, const uint32_t* const top) { 217 return Select(top[0], left, top[-1]); 218} 219 220static uint32_t Predictor12(uint32_t left, const uint32_t* const top) { 221 return ClampedAddSubtractFull(left, top[0], top[-1]); 222} 223 224static uint32_t Predictor13(uint32_t left, const uint32_t* const top) { 225 return ClampedAddSubtractHalf(left, top[0], top[-1]); 226} 227 228// Add green to blue and red channels (i.e. perform the inverse transform of 229// 'subtract green'). 230static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) { 231 uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; 232 uint32_t* const p_loop1_end = data + (num_pixels & ~3); 233 uint32_t* const p_loop2_end = data + num_pixels; 234 __asm__ volatile ( 235 ".set push \n\t" 236 ".set noreorder \n\t" 237 "beq %[data], %[p_loop1_end], 3f \n\t" 238 " nop \n\t" 239 "0: \n\t" 240 "lw %[temp0], 0(%[data]) \n\t" 241 "lw %[temp1], 4(%[data]) \n\t" 242 "lw %[temp2], 8(%[data]) \n\t" 243 "lw %[temp3], 12(%[data]) \n\t" 244 "ext %[temp4], %[temp0], 8, 8 \n\t" 245 "ext %[temp5], %[temp1], 8, 8 \n\t" 246 "ext %[temp6], %[temp2], 8, 8 \n\t" 247 "ext %[temp7], %[temp3], 8, 8 \n\t" 248 "addiu %[data], %[data], 16 \n\t" 249 "replv.ph %[temp4], %[temp4] \n\t" 250 "replv.ph %[temp5], %[temp5] \n\t" 251 "replv.ph %[temp6], %[temp6] \n\t" 252 "replv.ph %[temp7], %[temp7] \n\t" 253 "addu.qb %[temp0], %[temp0], %[temp4] \n\t" 254 "addu.qb %[temp1], %[temp1], %[temp5] \n\t" 255 "addu.qb %[temp2], %[temp2], %[temp6] \n\t" 256 "addu.qb %[temp3], %[temp3], %[temp7] \n\t" 257 "sw %[temp0], -16(%[data]) \n\t" 258 "sw %[temp1], -12(%[data]) \n\t" 259 "sw %[temp2], -8(%[data]) \n\t" 260 "bne %[data], %[p_loop1_end], 0b \n\t" 261 " sw %[temp3], -4(%[data]) \n\t" 262 "3: \n\t" 263 "beq %[data], %[p_loop2_end], 2f \n\t" 264 " nop \n\t" 265 "1: \n\t" 266 "lw %[temp0], 0(%[data]) \n\t" 267 "addiu %[data], %[data], 4 \n\t" 268 "ext %[temp4], %[temp0], 8, 8 \n\t" 269 "replv.ph %[temp4], %[temp4] \n\t" 270 "addu.qb %[temp0], %[temp0], %[temp4] \n\t" 271 "bne %[data], %[p_loop2_end], 1b \n\t" 272 " sw %[temp0], -4(%[data]) \n\t" 273 "2: \n\t" 274 ".set pop \n\t" 275 : [data]"+&r"(data), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), 276 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), 277 [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7) 278 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) 279 : "memory" 280 ); 281} 282 283static void TransformColorInverse(const VP8LMultipliers* const m, 284 uint32_t* data, int num_pixels) { 285 int temp0, temp1, temp2, temp3, temp4, temp5; 286 uint32_t argb, argb1, new_red; 287 const uint32_t G_to_R = m->green_to_red_; 288 const uint32_t G_to_B = m->green_to_blue_; 289 const uint32_t R_to_B = m->red_to_blue_; 290 uint32_t* const p_loop_end = data + (num_pixels & ~1); 291 __asm__ volatile ( 292 ".set push \n\t" 293 ".set noreorder \n\t" 294 "beq %[data], %[p_loop_end], 1f \n\t" 295 " nop \n\t" 296 "replv.ph %[temp0], %[G_to_R] \n\t" 297 "replv.ph %[temp1], %[G_to_B] \n\t" 298 "replv.ph %[temp2], %[R_to_B] \n\t" 299 "shll.ph %[temp0], %[temp0], 8 \n\t" 300 "shll.ph %[temp1], %[temp1], 8 \n\t" 301 "shll.ph %[temp2], %[temp2], 8 \n\t" 302 "shra.ph %[temp0], %[temp0], 8 \n\t" 303 "shra.ph %[temp1], %[temp1], 8 \n\t" 304 "shra.ph %[temp2], %[temp2], 8 \n\t" 305 "0: \n\t" 306 "lw %[argb], 0(%[data]) \n\t" 307 "lw %[argb1], 4(%[data]) \n\t" 308 "addiu %[data], %[data], 8 \n\t" 309 "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t" 310 "preceu.ph.qbra %[temp3], %[temp3] \n\t" 311 "shll.ph %[temp3], %[temp3], 8 \n\t" 312 "shra.ph %[temp3], %[temp3], 8 \n\t" 313 "mul.ph %[temp5], %[temp3], %[temp0] \n\t" 314 "mul.ph %[temp3], %[temp3], %[temp1] \n\t" 315 "precrq.ph.w %[new_red], %[argb], %[argb1] \n\t" 316 "ins %[argb1], %[argb], 16, 16 \n\t" 317 "shra.ph %[temp5], %[temp5], 5 \n\t" 318 "shra.ph %[temp3], %[temp3], 5 \n\t" 319 "addu.ph %[new_red], %[new_red], %[temp5] \n\t" 320 "addu.ph %[argb1], %[argb1], %[temp3] \n\t" 321 "preceu.ph.qbra %[temp5], %[new_red] \n\t" 322 "shll.ph %[temp4], %[temp5], 8 \n\t" 323 "shra.ph %[temp4], %[temp4], 8 \n\t" 324 "mul.ph %[temp4], %[temp4], %[temp2] \n\t" 325 "sb %[temp5], -2(%[data]) \n\t" 326 "sra %[temp5], %[temp5], 16 \n\t" 327 "shra.ph %[temp4], %[temp4], 5 \n\t" 328 "addu.ph %[argb1], %[argb1], %[temp4] \n\t" 329 "preceu.ph.qbra %[temp3], %[argb1] \n\t" 330 "sb %[temp5], -6(%[data]) \n\t" 331 "sb %[temp3], -4(%[data]) \n\t" 332 "sra %[temp3], %[temp3], 16 \n\t" 333 "bne %[data], %[p_loop_end], 0b \n\t" 334 " sb %[temp3], -8(%[data]) \n\t" 335 "1: \n\t" 336 ".set pop \n\t" 337 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 338 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 339 [new_red]"=&r"(new_red), [argb]"=&r"(argb), 340 [argb1]"=&r"(argb1), [data]"+&r"(data) 341 : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B), 342 [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end) 343 : "memory", "hi", "lo" 344 ); 345 346 // Fall-back to C-version for left-overs. 347 if (num_pixels & 1) VP8LTransformColorInverse_C(m, data, 1); 348} 349 350static void ConvertBGRAToRGB(const uint32_t* src, 351 int num_pixels, uint8_t* dst) { 352 int temp0, temp1, temp2, temp3; 353 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); 354 const uint32_t* const p_loop2_end = src + num_pixels; 355 __asm__ volatile ( 356 ".set push \n\t" 357 ".set noreorder \n\t" 358 "beq %[src], %[p_loop1_end], 3f \n\t" 359 " nop \n\t" 360 "0: \n\t" 361 "lw %[temp3], 12(%[src]) \n\t" 362 "lw %[temp2], 8(%[src]) \n\t" 363 "lw %[temp1], 4(%[src]) \n\t" 364 "lw %[temp0], 0(%[src]) \n\t" 365 "ins %[temp3], %[temp2], 24, 8 \n\t" 366 "sll %[temp2], %[temp2], 8 \n\t" 367 "rotr %[temp3], %[temp3], 16 \n\t" 368 "ins %[temp2], %[temp1], 0, 16 \n\t" 369 "sll %[temp1], %[temp1], 8 \n\t" 370 "wsbh %[temp3], %[temp3] \n\t" 371 "balign %[temp0], %[temp1], 1 \n\t" 372 "wsbh %[temp2], %[temp2] \n\t" 373 "wsbh %[temp0], %[temp0] \n\t" 374 "usw %[temp3], 8(%[dst]) \n\t" 375 "rotr %[temp0], %[temp0], 16 \n\t" 376 "usw %[temp2], 4(%[dst]) \n\t" 377 "addiu %[src], %[src], 16 \n\t" 378 "usw %[temp0], 0(%[dst]) \n\t" 379 "bne %[src], %[p_loop1_end], 0b \n\t" 380 " addiu %[dst], %[dst], 12 \n\t" 381 "3: \n\t" 382 "beq %[src], %[p_loop2_end], 2f \n\t" 383 " nop \n\t" 384 "1: \n\t" 385 "lw %[temp0], 0(%[src]) \n\t" 386 "addiu %[src], %[src], 4 \n\t" 387 "wsbh %[temp1], %[temp0] \n\t" 388 "addiu %[dst], %[dst], 3 \n\t" 389 "ush %[temp1], -2(%[dst]) \n\t" 390 "sra %[temp0], %[temp0], 16 \n\t" 391 "bne %[src], %[p_loop2_end], 1b \n\t" 392 " sb %[temp0], -3(%[dst]) \n\t" 393 "2: \n\t" 394 ".set pop \n\t" 395 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 396 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src) 397 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) 398 : "memory" 399 ); 400} 401 402static void ConvertBGRAToRGBA(const uint32_t* src, 403 int num_pixels, uint8_t* dst) { 404 int temp0, temp1, temp2, temp3; 405 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); 406 const uint32_t* const p_loop2_end = src + num_pixels; 407 __asm__ volatile ( 408 ".set push \n\t" 409 ".set noreorder \n\t" 410 "beq %[src], %[p_loop1_end], 3f \n\t" 411 " nop \n\t" 412 "0: \n\t" 413 "lw %[temp0], 0(%[src]) \n\t" 414 "lw %[temp1], 4(%[src]) \n\t" 415 "lw %[temp2], 8(%[src]) \n\t" 416 "lw %[temp3], 12(%[src]) \n\t" 417 "wsbh %[temp0], %[temp0] \n\t" 418 "wsbh %[temp1], %[temp1] \n\t" 419 "wsbh %[temp2], %[temp2] \n\t" 420 "wsbh %[temp3], %[temp3] \n\t" 421 "addiu %[src], %[src], 16 \n\t" 422 "balign %[temp0], %[temp0], 1 \n\t" 423 "balign %[temp1], %[temp1], 1 \n\t" 424 "balign %[temp2], %[temp2], 1 \n\t" 425 "balign %[temp3], %[temp3], 1 \n\t" 426 "usw %[temp0], 0(%[dst]) \n\t" 427 "usw %[temp1], 4(%[dst]) \n\t" 428 "usw %[temp2], 8(%[dst]) \n\t" 429 "usw %[temp3], 12(%[dst]) \n\t" 430 "bne %[src], %[p_loop1_end], 0b \n\t" 431 " addiu %[dst], %[dst], 16 \n\t" 432 "3: \n\t" 433 "beq %[src], %[p_loop2_end], 2f \n\t" 434 " nop \n\t" 435 "1: \n\t" 436 "lw %[temp0], 0(%[src]) \n\t" 437 "wsbh %[temp0], %[temp0] \n\t" 438 "addiu %[src], %[src], 4 \n\t" 439 "balign %[temp0], %[temp0], 1 \n\t" 440 "usw %[temp0], 0(%[dst]) \n\t" 441 "bne %[src], %[p_loop2_end], 1b \n\t" 442 " addiu %[dst], %[dst], 4 \n\t" 443 "2: \n\t" 444 ".set pop \n\t" 445 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 446 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src) 447 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) 448 : "memory" 449 ); 450} 451 452static void ConvertBGRAToRGBA4444(const uint32_t* src, 453 int num_pixels, uint8_t* dst) { 454 int temp0, temp1, temp2, temp3, temp4, temp5; 455 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); 456 const uint32_t* const p_loop2_end = src + num_pixels; 457 __asm__ volatile ( 458 ".set push \n\t" 459 ".set noreorder \n\t" 460 "beq %[src], %[p_loop1_end], 3f \n\t" 461 " nop \n\t" 462 "0: \n\t" 463 "lw %[temp0], 0(%[src]) \n\t" 464 "lw %[temp1], 4(%[src]) \n\t" 465 "lw %[temp2], 8(%[src]) \n\t" 466 "lw %[temp3], 12(%[src]) \n\t" 467 "ext %[temp4], %[temp0], 28, 4 \n\t" 468 "ext %[temp5], %[temp0], 12, 4 \n\t" 469 "ins %[temp0], %[temp4], 0, 4 \n\t" 470 "ext %[temp4], %[temp1], 28, 4 \n\t" 471 "ins %[temp0], %[temp5], 16, 4 \n\t" 472 "ext %[temp5], %[temp1], 12, 4 \n\t" 473 "ins %[temp1], %[temp4], 0, 4 \n\t" 474 "ext %[temp4], %[temp2], 28, 4 \n\t" 475 "ins %[temp1], %[temp5], 16, 4 \n\t" 476 "ext %[temp5], %[temp2], 12, 4 \n\t" 477 "ins %[temp2], %[temp4], 0, 4 \n\t" 478 "ext %[temp4], %[temp3], 28, 4 \n\t" 479 "ins %[temp2], %[temp5], 16, 4 \n\t" 480 "ext %[temp5], %[temp3], 12, 4 \n\t" 481 "ins %[temp3], %[temp4], 0, 4 \n\t" 482 "precr.qb.ph %[temp1], %[temp1], %[temp0] \n\t" 483 "ins %[temp3], %[temp5], 16, 4 \n\t" 484 "addiu %[src], %[src], 16 \n\t" 485 "precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t" 486#ifdef WEBP_SWAP_16BIT_CSP 487 "usw %[temp1], 0(%[dst]) \n\t" 488 "usw %[temp3], 4(%[dst]) \n\t" 489#else 490 "wsbh %[temp1], %[temp1] \n\t" 491 "wsbh %[temp3], %[temp3] \n\t" 492 "usw %[temp1], 0(%[dst]) \n\t" 493 "usw %[temp3], 4(%[dst]) \n\t" 494#endif 495 "bne %[src], %[p_loop1_end], 0b \n\t" 496 " addiu %[dst], %[dst], 8 \n\t" 497 "3: \n\t" 498 "beq %[src], %[p_loop2_end], 2f \n\t" 499 " nop \n\t" 500 "1: \n\t" 501 "lw %[temp0], 0(%[src]) \n\t" 502 "ext %[temp4], %[temp0], 28, 4 \n\t" 503 "ext %[temp5], %[temp0], 12, 4 \n\t" 504 "ins %[temp0], %[temp4], 0, 4 \n\t" 505 "ins %[temp0], %[temp5], 16, 4 \n\t" 506 "addiu %[src], %[src], 4 \n\t" 507 "precr.qb.ph %[temp0], %[temp0], %[temp0] \n\t" 508#ifdef WEBP_SWAP_16BIT_CSP 509 "ush %[temp0], 0(%[dst]) \n\t" 510#else 511 "wsbh %[temp0], %[temp0] \n\t" 512 "ush %[temp0], 0(%[dst]) \n\t" 513#endif 514 "bne %[src], %[p_loop2_end], 1b \n\t" 515 " addiu %[dst], %[dst], 2 \n\t" 516 "2: \n\t" 517 ".set pop \n\t" 518 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 519 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 520 [dst]"+&r"(dst), [src]"+&r"(src) 521 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) 522 : "memory" 523 ); 524} 525 526static void ConvertBGRAToRGB565(const uint32_t* src, 527 int num_pixels, uint8_t* dst) { 528 int temp0, temp1, temp2, temp3, temp4, temp5; 529 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); 530 const uint32_t* const p_loop2_end = src + num_pixels; 531 __asm__ volatile ( 532 ".set push \n\t" 533 ".set noreorder \n\t" 534 "beq %[src], %[p_loop1_end], 3f \n\t" 535 " nop \n\t" 536 "0: \n\t" 537 "lw %[temp0], 0(%[src]) \n\t" 538 "lw %[temp1], 4(%[src]) \n\t" 539 "lw %[temp2], 8(%[src]) \n\t" 540 "lw %[temp3], 12(%[src]) \n\t" 541 "ext %[temp4], %[temp0], 8, 16 \n\t" 542 "ext %[temp5], %[temp0], 5, 11 \n\t" 543 "ext %[temp0], %[temp0], 3, 5 \n\t" 544 "ins %[temp4], %[temp5], 0, 11 \n\t" 545 "ext %[temp5], %[temp1], 5, 11 \n\t" 546 "ins %[temp4], %[temp0], 0, 5 \n\t" 547 "ext %[temp0], %[temp1], 8, 16 \n\t" 548 "ext %[temp1], %[temp1], 3, 5 \n\t" 549 "ins %[temp0], %[temp5], 0, 11 \n\t" 550 "ext %[temp5], %[temp2], 5, 11 \n\t" 551 "ins %[temp0], %[temp1], 0, 5 \n\t" 552 "ext %[temp1], %[temp2], 8, 16 \n\t" 553 "ext %[temp2], %[temp2], 3, 5 \n\t" 554 "ins %[temp1], %[temp5], 0, 11 \n\t" 555 "ext %[temp5], %[temp3], 5, 11 \n\t" 556 "ins %[temp1], %[temp2], 0, 5 \n\t" 557 "ext %[temp2], %[temp3], 8, 16 \n\t" 558 "ext %[temp3], %[temp3], 3, 5 \n\t" 559 "ins %[temp2], %[temp5], 0, 11 \n\t" 560 "append %[temp0], %[temp4], 16 \n\t" 561 "ins %[temp2], %[temp3], 0, 5 \n\t" 562 "addiu %[src], %[src], 16 \n\t" 563 "append %[temp2], %[temp1], 16 \n\t" 564#ifdef WEBP_SWAP_16BIT_CSP 565 "usw %[temp0], 0(%[dst]) \n\t" 566 "usw %[temp2], 4(%[dst]) \n\t" 567#else 568 "wsbh %[temp0], %[temp0] \n\t" 569 "wsbh %[temp2], %[temp2] \n\t" 570 "usw %[temp0], 0(%[dst]) \n\t" 571 "usw %[temp2], 4(%[dst]) \n\t" 572#endif 573 "bne %[src], %[p_loop1_end], 0b \n\t" 574 " addiu %[dst], %[dst], 8 \n\t" 575 "3: \n\t" 576 "beq %[src], %[p_loop2_end], 2f \n\t" 577 " nop \n\t" 578 "1: \n\t" 579 "lw %[temp0], 0(%[src]) \n\t" 580 "ext %[temp4], %[temp0], 8, 16 \n\t" 581 "ext %[temp5], %[temp0], 5, 11 \n\t" 582 "ext %[temp0], %[temp0], 3, 5 \n\t" 583 "ins %[temp4], %[temp5], 0, 11 \n\t" 584 "addiu %[src], %[src], 4 \n\t" 585 "ins %[temp4], %[temp0], 0, 5 \n\t" 586#ifdef WEBP_SWAP_16BIT_CSP 587 "ush %[temp4], 0(%[dst]) \n\t" 588#else 589 "wsbh %[temp4], %[temp4] \n\t" 590 "ush %[temp4], 0(%[dst]) \n\t" 591#endif 592 "bne %[src], %[p_loop2_end], 1b \n\t" 593 " addiu %[dst], %[dst], 2 \n\t" 594 "2: \n\t" 595 ".set pop \n\t" 596 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 597 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 598 [dst]"+&r"(dst), [src]"+&r"(src) 599 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) 600 : "memory" 601 ); 602} 603 604static void ConvertBGRAToBGR(const uint32_t* src, 605 int num_pixels, uint8_t* dst) { 606 int temp0, temp1, temp2, temp3; 607 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); 608 const uint32_t* const p_loop2_end = src + num_pixels; 609 __asm__ volatile ( 610 ".set push \n\t" 611 ".set noreorder \n\t" 612 "beq %[src], %[p_loop1_end], 3f \n\t" 613 " nop \n\t" 614 "0: \n\t" 615 "lw %[temp0], 0(%[src]) \n\t" 616 "lw %[temp1], 4(%[src]) \n\t" 617 "lw %[temp2], 8(%[src]) \n\t" 618 "lw %[temp3], 12(%[src]) \n\t" 619 "ins %[temp0], %[temp1], 24, 8 \n\t" 620 "sra %[temp1], %[temp1], 8 \n\t" 621 "ins %[temp1], %[temp2], 16, 16 \n\t" 622 "sll %[temp2], %[temp2], 8 \n\t" 623 "balign %[temp3], %[temp2], 1 \n\t" 624 "addiu %[src], %[src], 16 \n\t" 625 "usw %[temp0], 0(%[dst]) \n\t" 626 "usw %[temp1], 4(%[dst]) \n\t" 627 "usw %[temp3], 8(%[dst]) \n\t" 628 "bne %[src], %[p_loop1_end], 0b \n\t" 629 " addiu %[dst], %[dst], 12 \n\t" 630 "3: \n\t" 631 "beq %[src], %[p_loop2_end], 2f \n\t" 632 " nop \n\t" 633 "1: \n\t" 634 "lw %[temp0], 0(%[src]) \n\t" 635 "addiu %[src], %[src], 4 \n\t" 636 "addiu %[dst], %[dst], 3 \n\t" 637 "ush %[temp0], -3(%[dst]) \n\t" 638 "sra %[temp0], %[temp0], 16 \n\t" 639 "bne %[src], %[p_loop2_end], 1b \n\t" 640 " sb %[temp0], -1(%[dst]) \n\t" 641 "2: \n\t" 642 ".set pop \n\t" 643 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 644 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src) 645 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) 646 : "memory" 647 ); 648} 649 650//------------------------------------------------------------------------------ 651// Entry point 652 653extern void VP8LDspInitMIPSdspR2(void); 654 655WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) { 656 VP8LMapColor32b = MapARGB; 657 VP8LMapColor8b = MapAlpha; 658 VP8LPredictors[5] = Predictor5; 659 VP8LPredictors[6] = Predictor6; 660 VP8LPredictors[7] = Predictor7; 661 VP8LPredictors[8] = Predictor8; 662 VP8LPredictors[9] = Predictor9; 663 VP8LPredictors[10] = Predictor10; 664 VP8LPredictors[11] = Predictor11; 665 VP8LPredictors[12] = Predictor12; 666 VP8LPredictors[13] = Predictor13; 667 VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed; 668 VP8LTransformColorInverse = TransformColorInverse; 669 VP8LConvertBGRAToRGB = ConvertBGRAToRGB; 670 VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA; 671 VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444; 672 VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565; 673 VP8LConvertBGRAToBGR = ConvertBGRAToBGR; 674} 675 676#else // !WEBP_USE_MIPS_DSP_R2 677 678WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2) 679 680#endif // WEBP_USE_MIPS_DSP_R2 681