17c8da7ce66017295a65ec028084b90800be377f8James Zern// Copyright 2014 Google Inc. All Rights Reserved. 27c8da7ce66017295a65ec028084b90800be377f8James Zern// 37c8da7ce66017295a65ec028084b90800be377f8James Zern// Use of this source code is governed by a BSD-style license 47c8da7ce66017295a65ec028084b90800be377f8James Zern// that can be found in the COPYING file in the root of the source 57c8da7ce66017295a65ec028084b90800be377f8James Zern// tree. An additional intellectual property rights grant can be found 67c8da7ce66017295a65ec028084b90800be377f8James Zern// in the file PATENTS. All contributing project authors may 77c8da7ce66017295a65ec028084b90800be377f8James Zern// be found in the AUTHORS file in the root of the source tree. 87c8da7ce66017295a65ec028084b90800be377f8James Zern// ----------------------------------------------------------------------------- 97c8da7ce66017295a65ec028084b90800be377f8James Zern// 107c8da7ce66017295a65ec028084b90800be377f8James Zern// MIPS version of rescaling functions 117c8da7ce66017295a65ec028084b90800be377f8James Zern// 127c8da7ce66017295a65ec028084b90800be377f8James Zern// Author(s): Djordje Pesut (djordje.pesut@imgtec.com) 137c8da7ce66017295a65ec028084b90800be377f8James Zern 147c8da7ce66017295a65ec028084b90800be377f8James Zern#include "./dsp.h" 157c8da7ce66017295a65ec028084b90800be377f8James Zern 167c8da7ce66017295a65ec028084b90800be377f8James Zern#if defined(WEBP_USE_MIPS_DSP_R2) 177c8da7ce66017295a65ec028084b90800be377f8James Zern 187c8da7ce66017295a65ec028084b90800be377f8James Zern#include <assert.h> 19fa39824bb690c5806358871f46940d0450973d8aJames Zern#include "../utils/rescaler_utils.h" 207c8da7ce66017295a65ec028084b90800be377f8James Zern 217c8da7ce66017295a65ec028084b90800be377f8James Zern#define ROUNDER (WEBP_RESCALER_ONE >> 1) 227c8da7ce66017295a65ec028084b90800be377f8James Zern#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX) 237c8da7ce66017295a65ec028084b90800be377f8James Zern 247c8da7ce66017295a65ec028084b90800be377f8James Zern//------------------------------------------------------------------------------ 257c8da7ce66017295a65ec028084b90800be377f8James Zern// Row export 267c8da7ce66017295a65ec028084b90800be377f8James Zern 277c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void ExportRowShrink(WebPRescaler* const wrk) { 287c8da7ce66017295a65ec028084b90800be377f8James Zern int i; 297c8da7ce66017295a65ec028084b90800be377f8James Zern const int x_out_max = wrk->dst_width * wrk->num_channels; 307c8da7ce66017295a65ec028084b90800be377f8James Zern uint8_t* dst = wrk->dst; 317c8da7ce66017295a65ec028084b90800be377f8James Zern rescaler_t* irow = wrk->irow; 327c8da7ce66017295a65ec028084b90800be377f8James Zern const rescaler_t* frow = wrk->frow; 337c8da7ce66017295a65ec028084b90800be377f8James Zern const int yscale = wrk->fy_scale * (-wrk->y_accum); 347c8da7ce66017295a65ec028084b90800be377f8James Zern int temp0, temp1, temp2, temp3, temp4, temp5, loop_end; 357c8da7ce66017295a65ec028084b90800be377f8James Zern const int temp7 = (int)wrk->fxy_scale; 367c8da7ce66017295a65ec028084b90800be377f8James Zern const int temp6 = (x_out_max & ~0x3) << 2; 377c8da7ce66017295a65ec028084b90800be377f8James Zern assert(!WebPRescalerOutputDone(wrk)); 387c8da7ce66017295a65ec028084b90800be377f8James Zern assert(wrk->y_accum <= 0); 397c8da7ce66017295a65ec028084b90800be377f8James Zern assert(!wrk->y_expand); 407c8da7ce66017295a65ec028084b90800be377f8James Zern assert(wrk->fxy_scale != 0); 417c8da7ce66017295a65ec028084b90800be377f8James Zern if (yscale) { 427c8da7ce66017295a65ec028084b90800be377f8James Zern if (x_out_max >= 4) { 437c8da7ce66017295a65ec028084b90800be377f8James Zern int temp8, temp9, temp10, temp11; 447c8da7ce66017295a65ec028084b90800be377f8James Zern __asm__ volatile ( 457c8da7ce66017295a65ec028084b90800be377f8James Zern "li %[temp3], 0x10000 \n\t" 467c8da7ce66017295a65ec028084b90800be377f8James Zern "li %[temp4], 0x8000 \n\t" 477c8da7ce66017295a65ec028084b90800be377f8James Zern "addu %[loop_end], %[frow], %[temp6] \n\t" 487c8da7ce66017295a65ec028084b90800be377f8James Zern "1: \n\t" 497c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp0], 0(%[frow]) \n\t" 507c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp1], 4(%[frow]) \n\t" 517c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp2], 8(%[frow]) \n\t" 527c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp5], 12(%[frow]) \n\t" 537c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac0, %[temp3], %[temp4] \n\t" 547c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac0, %[temp0], %[yscale] \n\t" 557c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac1, %[temp3], %[temp4] \n\t" 567c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac1, %[temp1], %[yscale] \n\t" 577c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac2, %[temp3], %[temp4] \n\t" 587c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac2, %[temp2], %[yscale] \n\t" 597c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac3, %[temp3], %[temp4] \n\t" 607c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac3, %[temp5], %[yscale] \n\t" 617c8da7ce66017295a65ec028084b90800be377f8James Zern "addiu %[frow], %[frow], 16 \n\t" 627c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp0], $ac0 \n\t" 637c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp1], $ac1 \n\t" 647c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp2], $ac2 \n\t" 657c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp5], $ac3 \n\t" 667c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp8], 0(%[irow]) \n\t" 677c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp9], 4(%[irow]) \n\t" 687c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp10], 8(%[irow]) \n\t" 697c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp11], 12(%[irow]) \n\t" 707c8da7ce66017295a65ec028084b90800be377f8James Zern "addiu %[dst], %[dst], 4 \n\t" 717c8da7ce66017295a65ec028084b90800be377f8James Zern "addiu %[irow], %[irow], 16 \n\t" 727c8da7ce66017295a65ec028084b90800be377f8James Zern "subu %[temp8], %[temp8], %[temp0] \n\t" 737c8da7ce66017295a65ec028084b90800be377f8James Zern "subu %[temp9], %[temp9], %[temp1] \n\t" 747c8da7ce66017295a65ec028084b90800be377f8James Zern "subu %[temp10], %[temp10], %[temp2] \n\t" 757c8da7ce66017295a65ec028084b90800be377f8James Zern "subu %[temp11], %[temp11], %[temp5] \n\t" 767c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac0, %[temp3], %[temp4] \n\t" 777c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac0, %[temp8], %[temp7] \n\t" 787c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac1, %[temp3], %[temp4] \n\t" 797c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac1, %[temp9], %[temp7] \n\t" 807c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac2, %[temp3], %[temp4] \n\t" 817c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac2, %[temp10], %[temp7] \n\t" 827c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac3, %[temp3], %[temp4] \n\t" 837c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac3, %[temp11], %[temp7] \n\t" 847c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp8], $ac0 \n\t" 857c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp9], $ac1 \n\t" 867c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp10], $ac2 \n\t" 877c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp11], $ac3 \n\t" 887c8da7ce66017295a65ec028084b90800be377f8James Zern "sw %[temp0], -16(%[irow]) \n\t" 897c8da7ce66017295a65ec028084b90800be377f8James Zern "sw %[temp1], -12(%[irow]) \n\t" 907c8da7ce66017295a65ec028084b90800be377f8James Zern "sw %[temp2], -8(%[irow]) \n\t" 917c8da7ce66017295a65ec028084b90800be377f8James Zern "sw %[temp5], -4(%[irow]) \n\t" 927c8da7ce66017295a65ec028084b90800be377f8James Zern "sb %[temp8], -4(%[dst]) \n\t" 937c8da7ce66017295a65ec028084b90800be377f8James Zern "sb %[temp9], -3(%[dst]) \n\t" 947c8da7ce66017295a65ec028084b90800be377f8James Zern "sb %[temp10], -2(%[dst]) \n\t" 957c8da7ce66017295a65ec028084b90800be377f8James Zern "sb %[temp11], -1(%[dst]) \n\t" 967c8da7ce66017295a65ec028084b90800be377f8James Zern "bne %[frow], %[loop_end], 1b \n\t" 977c8da7ce66017295a65ec028084b90800be377f8James Zern : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), 987c8da7ce66017295a65ec028084b90800be377f8James Zern [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow), 997c8da7ce66017295a65ec028084b90800be377f8James Zern [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end), 1007c8da7ce66017295a65ec028084b90800be377f8James Zern [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), 1017c8da7ce66017295a65ec028084b90800be377f8James Zern [temp11]"=&r"(temp11), [temp2]"=&r"(temp2) 1027c8da7ce66017295a65ec028084b90800be377f8James Zern : [temp7]"r"(temp7), [yscale]"r"(yscale), [temp6]"r"(temp6) 1037c8da7ce66017295a65ec028084b90800be377f8James Zern : "memory", "hi", "lo", "$ac1hi", "$ac1lo", 1047c8da7ce66017295a65ec028084b90800be377f8James Zern "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo" 1057c8da7ce66017295a65ec028084b90800be377f8James Zern ); 1067c8da7ce66017295a65ec028084b90800be377f8James Zern } 1077c8da7ce66017295a65ec028084b90800be377f8James Zern for (i = 0; i < (x_out_max & 0x3); ++i) { 1087c8da7ce66017295a65ec028084b90800be377f8James Zern const uint32_t frac = (uint32_t)MULT_FIX(*frow++, yscale); 1097c8da7ce66017295a65ec028084b90800be377f8James Zern const int v = (int)MULT_FIX(*irow - frac, wrk->fxy_scale); 1107c8da7ce66017295a65ec028084b90800be377f8James Zern assert(v >= 0 && v <= 255); 1117c8da7ce66017295a65ec028084b90800be377f8James Zern *dst++ = v; 1127c8da7ce66017295a65ec028084b90800be377f8James Zern *irow++ = frac; // new fractional start 1137c8da7ce66017295a65ec028084b90800be377f8James Zern } 1147c8da7ce66017295a65ec028084b90800be377f8James Zern } else { 1157c8da7ce66017295a65ec028084b90800be377f8James Zern if (x_out_max >= 4) { 1167c8da7ce66017295a65ec028084b90800be377f8James Zern __asm__ volatile ( 1177c8da7ce66017295a65ec028084b90800be377f8James Zern "li %[temp3], 0x10000 \n\t" 1187c8da7ce66017295a65ec028084b90800be377f8James Zern "li %[temp4], 0x8000 \n\t" 1197c8da7ce66017295a65ec028084b90800be377f8James Zern "addu %[loop_end], %[irow], %[temp6] \n\t" 1207c8da7ce66017295a65ec028084b90800be377f8James Zern "1: \n\t" 1217c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp0], 0(%[irow]) \n\t" 1227c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp1], 4(%[irow]) \n\t" 1237c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp2], 8(%[irow]) \n\t" 1247c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp5], 12(%[irow]) \n\t" 1257c8da7ce66017295a65ec028084b90800be377f8James Zern "addiu %[dst], %[dst], 4 \n\t" 1267c8da7ce66017295a65ec028084b90800be377f8James Zern "addiu %[irow], %[irow], 16 \n\t" 1277c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac0, %[temp3], %[temp4] \n\t" 1287c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac0, %[temp0], %[temp7] \n\t" 1297c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac1, %[temp3], %[temp4] \n\t" 1307c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac1, %[temp1], %[temp7] \n\t" 1317c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac2, %[temp3], %[temp4] \n\t" 1327c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac2, %[temp2], %[temp7] \n\t" 1337c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac3, %[temp3], %[temp4] \n\t" 1347c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac3, %[temp5], %[temp7] \n\t" 1357c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp0], $ac0 \n\t" 1367c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp1], $ac1 \n\t" 1377c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp2], $ac2 \n\t" 1387c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp5], $ac3 \n\t" 1397c8da7ce66017295a65ec028084b90800be377f8James Zern "sw $zero, -16(%[irow]) \n\t" 1407c8da7ce66017295a65ec028084b90800be377f8James Zern "sw $zero, -12(%[irow]) \n\t" 1417c8da7ce66017295a65ec028084b90800be377f8James Zern "sw $zero, -8(%[irow]) \n\t" 1427c8da7ce66017295a65ec028084b90800be377f8James Zern "sw $zero, -4(%[irow]) \n\t" 1437c8da7ce66017295a65ec028084b90800be377f8James Zern "sb %[temp0], -4(%[dst]) \n\t" 1447c8da7ce66017295a65ec028084b90800be377f8James Zern "sb %[temp1], -3(%[dst]) \n\t" 1457c8da7ce66017295a65ec028084b90800be377f8James Zern "sb %[temp2], -2(%[dst]) \n\t" 1467c8da7ce66017295a65ec028084b90800be377f8James Zern "sb %[temp5], -1(%[dst]) \n\t" 1477c8da7ce66017295a65ec028084b90800be377f8James Zern "bne %[irow], %[loop_end], 1b \n\t" 1487c8da7ce66017295a65ec028084b90800be377f8James Zern : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), 1497c8da7ce66017295a65ec028084b90800be377f8James Zern [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow), 1507c8da7ce66017295a65ec028084b90800be377f8James Zern [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2) 1517c8da7ce66017295a65ec028084b90800be377f8James Zern : [temp7]"r"(temp7), [temp6]"r"(temp6) 1527c8da7ce66017295a65ec028084b90800be377f8James Zern : "memory", "hi", "lo", "$ac1hi", "$ac1lo", 1537c8da7ce66017295a65ec028084b90800be377f8James Zern "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo" 1547c8da7ce66017295a65ec028084b90800be377f8James Zern ); 1557c8da7ce66017295a65ec028084b90800be377f8James Zern } 1567c8da7ce66017295a65ec028084b90800be377f8James Zern for (i = 0; i < (x_out_max & 0x3); ++i) { 1577c8da7ce66017295a65ec028084b90800be377f8James Zern const int v = (int)MULT_FIX(*irow, wrk->fxy_scale); 1587c8da7ce66017295a65ec028084b90800be377f8James Zern assert(v >= 0 && v <= 255); 1597c8da7ce66017295a65ec028084b90800be377f8James Zern *dst++ = v; 1607c8da7ce66017295a65ec028084b90800be377f8James Zern *irow++ = 0; 1617c8da7ce66017295a65ec028084b90800be377f8James Zern } 1627c8da7ce66017295a65ec028084b90800be377f8James Zern } 1637c8da7ce66017295a65ec028084b90800be377f8James Zern} 1647c8da7ce66017295a65ec028084b90800be377f8James Zern 1657c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void ExportRowExpand(WebPRescaler* const wrk) { 1667c8da7ce66017295a65ec028084b90800be377f8James Zern int i; 1677c8da7ce66017295a65ec028084b90800be377f8James Zern uint8_t* dst = wrk->dst; 1687c8da7ce66017295a65ec028084b90800be377f8James Zern rescaler_t* irow = wrk->irow; 1697c8da7ce66017295a65ec028084b90800be377f8James Zern const int x_out_max = wrk->dst_width * wrk->num_channels; 1707c8da7ce66017295a65ec028084b90800be377f8James Zern const rescaler_t* frow = wrk->frow; 1717c8da7ce66017295a65ec028084b90800be377f8James Zern int temp0, temp1, temp2, temp3, temp4, temp5, loop_end; 1727c8da7ce66017295a65ec028084b90800be377f8James Zern const int temp6 = (x_out_max & ~0x3) << 2; 1737c8da7ce66017295a65ec028084b90800be377f8James Zern const int temp7 = (int)wrk->fy_scale; 1747c8da7ce66017295a65ec028084b90800be377f8James Zern assert(!WebPRescalerOutputDone(wrk)); 1757c8da7ce66017295a65ec028084b90800be377f8James Zern assert(wrk->y_accum <= 0); 1767c8da7ce66017295a65ec028084b90800be377f8James Zern assert(wrk->y_expand); 1777c8da7ce66017295a65ec028084b90800be377f8James Zern assert(wrk->y_sub != 0); 1787c8da7ce66017295a65ec028084b90800be377f8James Zern if (wrk->y_accum == 0) { 1797c8da7ce66017295a65ec028084b90800be377f8James Zern if (x_out_max >= 4) { 1807c8da7ce66017295a65ec028084b90800be377f8James Zern __asm__ volatile ( 1817c8da7ce66017295a65ec028084b90800be377f8James Zern "li %[temp4], 0x10000 \n\t" 1827c8da7ce66017295a65ec028084b90800be377f8James Zern "li %[temp5], 0x8000 \n\t" 1837c8da7ce66017295a65ec028084b90800be377f8James Zern "addu %[loop_end], %[frow], %[temp6] \n\t" 1847c8da7ce66017295a65ec028084b90800be377f8James Zern "1: \n\t" 1857c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp0], 0(%[frow]) \n\t" 1867c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp1], 4(%[frow]) \n\t" 1877c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp2], 8(%[frow]) \n\t" 1887c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp3], 12(%[frow]) \n\t" 1897c8da7ce66017295a65ec028084b90800be377f8James Zern "addiu %[dst], %[dst], 4 \n\t" 1907c8da7ce66017295a65ec028084b90800be377f8James Zern "addiu %[frow], %[frow], 16 \n\t" 1917c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac0, %[temp4], %[temp5] \n\t" 1927c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac0, %[temp0], %[temp7] \n\t" 1937c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac1, %[temp4], %[temp5] \n\t" 1947c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac1, %[temp1], %[temp7] \n\t" 1957c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac2, %[temp4], %[temp5] \n\t" 1967c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac2, %[temp2], %[temp7] \n\t" 1977c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac3, %[temp4], %[temp5] \n\t" 1987c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac3, %[temp3], %[temp7] \n\t" 1997c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp0], $ac0 \n\t" 2007c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp1], $ac1 \n\t" 2017c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp2], $ac2 \n\t" 2027c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp3], $ac3 \n\t" 2037c8da7ce66017295a65ec028084b90800be377f8James Zern "sb %[temp0], -4(%[dst]) \n\t" 2047c8da7ce66017295a65ec028084b90800be377f8James Zern "sb %[temp1], -3(%[dst]) \n\t" 2057c8da7ce66017295a65ec028084b90800be377f8James Zern "sb %[temp2], -2(%[dst]) \n\t" 2067c8da7ce66017295a65ec028084b90800be377f8James Zern "sb %[temp3], -1(%[dst]) \n\t" 2077c8da7ce66017295a65ec028084b90800be377f8James Zern "bne %[frow], %[loop_end], 1b \n\t" 2087c8da7ce66017295a65ec028084b90800be377f8James Zern : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), 2097c8da7ce66017295a65ec028084b90800be377f8James Zern [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow), 2107c8da7ce66017295a65ec028084b90800be377f8James Zern [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2) 2117c8da7ce66017295a65ec028084b90800be377f8James Zern : [temp7]"r"(temp7), [temp6]"r"(temp6) 2127c8da7ce66017295a65ec028084b90800be377f8James Zern : "memory", "hi", "lo", "$ac1hi", "$ac1lo", 2137c8da7ce66017295a65ec028084b90800be377f8James Zern "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo" 2147c8da7ce66017295a65ec028084b90800be377f8James Zern ); 2157c8da7ce66017295a65ec028084b90800be377f8James Zern } 2167c8da7ce66017295a65ec028084b90800be377f8James Zern for (i = 0; i < (x_out_max & 0x3); ++i) { 2177c8da7ce66017295a65ec028084b90800be377f8James Zern const uint32_t J = *frow++; 2187c8da7ce66017295a65ec028084b90800be377f8James Zern const int v = (int)MULT_FIX(J, wrk->fy_scale); 2197c8da7ce66017295a65ec028084b90800be377f8James Zern assert(v >= 0 && v <= 255); 2207c8da7ce66017295a65ec028084b90800be377f8James Zern *dst++ = v; 2217c8da7ce66017295a65ec028084b90800be377f8James Zern } 2227c8da7ce66017295a65ec028084b90800be377f8James Zern } else { 2237c8da7ce66017295a65ec028084b90800be377f8James Zern const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub); 2247c8da7ce66017295a65ec028084b90800be377f8James Zern const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B); 2257c8da7ce66017295a65ec028084b90800be377f8James Zern if (x_out_max >= 4) { 2267c8da7ce66017295a65ec028084b90800be377f8James Zern int temp8, temp9, temp10, temp11; 2277c8da7ce66017295a65ec028084b90800be377f8James Zern __asm__ volatile ( 2287c8da7ce66017295a65ec028084b90800be377f8James Zern "li %[temp8], 0x10000 \n\t" 2297c8da7ce66017295a65ec028084b90800be377f8James Zern "li %[temp9], 0x8000 \n\t" 2307c8da7ce66017295a65ec028084b90800be377f8James Zern "addu %[loop_end], %[frow], %[temp6] \n\t" 2317c8da7ce66017295a65ec028084b90800be377f8James Zern "1: \n\t" 2327c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp0], 0(%[frow]) \n\t" 2337c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp1], 4(%[frow]) \n\t" 2347c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp2], 8(%[frow]) \n\t" 2357c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp3], 12(%[frow]) \n\t" 2367c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp4], 0(%[irow]) \n\t" 2377c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp5], 4(%[irow]) \n\t" 2387c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp10], 8(%[irow]) \n\t" 2397c8da7ce66017295a65ec028084b90800be377f8James Zern "lw %[temp11], 12(%[irow]) \n\t" 2407c8da7ce66017295a65ec028084b90800be377f8James Zern "addiu %[dst], %[dst], 4 \n\t" 2417c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac0, %[temp8], %[temp9] \n\t" 2427c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac0, %[A], %[temp0] \n\t" 2437c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac0, %[B], %[temp4] \n\t" 2447c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac1, %[temp8], %[temp9] \n\t" 2457c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac1, %[A], %[temp1] \n\t" 2467c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac1, %[B], %[temp5] \n\t" 2477c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac2, %[temp8], %[temp9] \n\t" 2487c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac2, %[A], %[temp2] \n\t" 2497c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac2, %[B], %[temp10] \n\t" 2507c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac3, %[temp8], %[temp9] \n\t" 2517c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac3, %[A], %[temp3] \n\t" 2527c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac3, %[B], %[temp11] \n\t" 2537c8da7ce66017295a65ec028084b90800be377f8James Zern "addiu %[frow], %[frow], 16 \n\t" 2547c8da7ce66017295a65ec028084b90800be377f8James Zern "addiu %[irow], %[irow], 16 \n\t" 2557c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp0], $ac0 \n\t" 2567c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp1], $ac1 \n\t" 2577c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp2], $ac2 \n\t" 2587c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp3], $ac3 \n\t" 2597c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac0, %[temp8], %[temp9] \n\t" 2607c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac0, %[temp0], %[temp7] \n\t" 2617c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac1, %[temp8], %[temp9] \n\t" 2627c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac1, %[temp1], %[temp7] \n\t" 2637c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac2, %[temp8], %[temp9] \n\t" 2647c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac2, %[temp2], %[temp7] \n\t" 2657c8da7ce66017295a65ec028084b90800be377f8James Zern "mult $ac3, %[temp8], %[temp9] \n\t" 2667c8da7ce66017295a65ec028084b90800be377f8James Zern "maddu $ac3, %[temp3], %[temp7] \n\t" 2677c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp0], $ac0 \n\t" 2687c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp1], $ac1 \n\t" 2697c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp2], $ac2 \n\t" 2707c8da7ce66017295a65ec028084b90800be377f8James Zern "mfhi %[temp3], $ac3 \n\t" 2717c8da7ce66017295a65ec028084b90800be377f8James Zern "sb %[temp0], -4(%[dst]) \n\t" 2727c8da7ce66017295a65ec028084b90800be377f8James Zern "sb %[temp1], -3(%[dst]) \n\t" 2737c8da7ce66017295a65ec028084b90800be377f8James Zern "sb %[temp2], -2(%[dst]) \n\t" 2747c8da7ce66017295a65ec028084b90800be377f8James Zern "sb %[temp3], -1(%[dst]) \n\t" 2757c8da7ce66017295a65ec028084b90800be377f8James Zern "bne %[frow], %[loop_end], 1b \n\t" 2767c8da7ce66017295a65ec028084b90800be377f8James Zern : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), 2777c8da7ce66017295a65ec028084b90800be377f8James Zern [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow), 2787c8da7ce66017295a65ec028084b90800be377f8James Zern [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end), 2797c8da7ce66017295a65ec028084b90800be377f8James Zern [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), 2807c8da7ce66017295a65ec028084b90800be377f8James Zern [temp11]"=&r"(temp11), [temp2]"=&r"(temp2) 2817c8da7ce66017295a65ec028084b90800be377f8James Zern : [temp7]"r"(temp7), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B) 2827c8da7ce66017295a65ec028084b90800be377f8James Zern : "memory", "hi", "lo", "$ac1hi", "$ac1lo", 2837c8da7ce66017295a65ec028084b90800be377f8James Zern "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo" 2847c8da7ce66017295a65ec028084b90800be377f8James Zern ); 2857c8da7ce66017295a65ec028084b90800be377f8James Zern } 2867c8da7ce66017295a65ec028084b90800be377f8James Zern for (i = 0; i < (x_out_max & 0x3); ++i) { 2877c8da7ce66017295a65ec028084b90800be377f8James Zern const uint64_t I = (uint64_t)A * *frow++ 2887c8da7ce66017295a65ec028084b90800be377f8James Zern + (uint64_t)B * *irow++; 2897c8da7ce66017295a65ec028084b90800be377f8James Zern const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX); 2907c8da7ce66017295a65ec028084b90800be377f8James Zern const int v = (int)MULT_FIX(J, wrk->fy_scale); 2917c8da7ce66017295a65ec028084b90800be377f8James Zern assert(v >= 0 && v <= 255); 2927c8da7ce66017295a65ec028084b90800be377f8James Zern *dst++ = v; 2937c8da7ce66017295a65ec028084b90800be377f8James Zern } 2947c8da7ce66017295a65ec028084b90800be377f8James Zern } 2957c8da7ce66017295a65ec028084b90800be377f8James Zern} 2967c8da7ce66017295a65ec028084b90800be377f8James Zern 2977c8da7ce66017295a65ec028084b90800be377f8James Zern#undef MULT_FIX 2987c8da7ce66017295a65ec028084b90800be377f8James Zern#undef ROUNDER 2997c8da7ce66017295a65ec028084b90800be377f8James Zern 3007c8da7ce66017295a65ec028084b90800be377f8James Zern//------------------------------------------------------------------------------ 3017c8da7ce66017295a65ec028084b90800be377f8James Zern// Entry point 3027c8da7ce66017295a65ec028084b90800be377f8James Zern 3037c8da7ce66017295a65ec028084b90800be377f8James Zernextern void WebPRescalerDspInitMIPSdspR2(void); 3047c8da7ce66017295a65ec028084b90800be377f8James Zern 3057c8da7ce66017295a65ec028084b90800be377f8James ZernWEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPSdspR2(void) { 3067c8da7ce66017295a65ec028084b90800be377f8James Zern WebPRescalerExportRowExpand = ExportRowExpand; 3077c8da7ce66017295a65ec028084b90800be377f8James Zern WebPRescalerExportRowShrink = ExportRowShrink; 3087c8da7ce66017295a65ec028084b90800be377f8James Zern} 3097c8da7ce66017295a65ec028084b90800be377f8James Zern 3107c8da7ce66017295a65ec028084b90800be377f8James Zern#else // !WEBP_USE_MIPS_DSP_R2 3117c8da7ce66017295a65ec028084b90800be377f8James Zern 3127c8da7ce66017295a65ec028084b90800be377f8James ZernWEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPSdspR2) 3137c8da7ce66017295a65ec028084b90800be377f8James Zern 3147c8da7ce66017295a65ec028084b90800be377f8James Zern#endif // WEBP_USE_MIPS_DSP_R2 315