17c8da7ce66017295a65ec028084b90800be377f8James Zern// Copyright 2014 Google Inc. All Rights Reserved.
27c8da7ce66017295a65ec028084b90800be377f8James Zern//
37c8da7ce66017295a65ec028084b90800be377f8James Zern// Use of this source code is governed by a BSD-style license
47c8da7ce66017295a65ec028084b90800be377f8James Zern// that can be found in the COPYING file in the root of the source
57c8da7ce66017295a65ec028084b90800be377f8James Zern// tree. An additional intellectual property rights grant can be found
67c8da7ce66017295a65ec028084b90800be377f8James Zern// in the file PATENTS. All contributing project authors may
77c8da7ce66017295a65ec028084b90800be377f8James Zern// be found in the AUTHORS file in the root of the source tree.
87c8da7ce66017295a65ec028084b90800be377f8James Zern// -----------------------------------------------------------------------------
97c8da7ce66017295a65ec028084b90800be377f8James Zern//
107c8da7ce66017295a65ec028084b90800be377f8James Zern// MIPS version of rescaling functions
117c8da7ce66017295a65ec028084b90800be377f8James Zern//
127c8da7ce66017295a65ec028084b90800be377f8James Zern// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
137c8da7ce66017295a65ec028084b90800be377f8James Zern
147c8da7ce66017295a65ec028084b90800be377f8James Zern#include "./dsp.h"
157c8da7ce66017295a65ec028084b90800be377f8James Zern
167c8da7ce66017295a65ec028084b90800be377f8James Zern#if defined(WEBP_USE_MIPS_DSP_R2)
177c8da7ce66017295a65ec028084b90800be377f8James Zern
187c8da7ce66017295a65ec028084b90800be377f8James Zern#include <assert.h>
19fa39824bb690c5806358871f46940d0450973d8aJames Zern#include "../utils/rescaler_utils.h"
207c8da7ce66017295a65ec028084b90800be377f8James Zern
217c8da7ce66017295a65ec028084b90800be377f8James Zern#define ROUNDER (WEBP_RESCALER_ONE >> 1)
227c8da7ce66017295a65ec028084b90800be377f8James Zern#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
237c8da7ce66017295a65ec028084b90800be377f8James Zern
247c8da7ce66017295a65ec028084b90800be377f8James Zern//------------------------------------------------------------------------------
257c8da7ce66017295a65ec028084b90800be377f8James Zern// Row export
267c8da7ce66017295a65ec028084b90800be377f8James Zern
277c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void ExportRowShrink(WebPRescaler* const wrk) {
287c8da7ce66017295a65ec028084b90800be377f8James Zern  int i;
297c8da7ce66017295a65ec028084b90800be377f8James Zern  const int x_out_max = wrk->dst_width * wrk->num_channels;
307c8da7ce66017295a65ec028084b90800be377f8James Zern  uint8_t* dst = wrk->dst;
317c8da7ce66017295a65ec028084b90800be377f8James Zern  rescaler_t* irow = wrk->irow;
327c8da7ce66017295a65ec028084b90800be377f8James Zern  const rescaler_t* frow = wrk->frow;
337c8da7ce66017295a65ec028084b90800be377f8James Zern  const int yscale = wrk->fy_scale * (-wrk->y_accum);
347c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
357c8da7ce66017295a65ec028084b90800be377f8James Zern  const int temp7 = (int)wrk->fxy_scale;
367c8da7ce66017295a65ec028084b90800be377f8James Zern  const int temp6 = (x_out_max & ~0x3) << 2;
377c8da7ce66017295a65ec028084b90800be377f8James Zern  assert(!WebPRescalerOutputDone(wrk));
387c8da7ce66017295a65ec028084b90800be377f8James Zern  assert(wrk->y_accum <= 0);
397c8da7ce66017295a65ec028084b90800be377f8James Zern  assert(!wrk->y_expand);
407c8da7ce66017295a65ec028084b90800be377f8James Zern  assert(wrk->fxy_scale != 0);
417c8da7ce66017295a65ec028084b90800be377f8James Zern  if (yscale) {
427c8da7ce66017295a65ec028084b90800be377f8James Zern    if (x_out_max >= 4) {
437c8da7ce66017295a65ec028084b90800be377f8James Zern      int temp8, temp9, temp10, temp11;
447c8da7ce66017295a65ec028084b90800be377f8James Zern      __asm__ volatile (
457c8da7ce66017295a65ec028084b90800be377f8James Zern        "li       %[temp3],    0x10000                    \n\t"
467c8da7ce66017295a65ec028084b90800be377f8James Zern        "li       %[temp4],    0x8000                     \n\t"
477c8da7ce66017295a65ec028084b90800be377f8James Zern        "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
487c8da7ce66017295a65ec028084b90800be377f8James Zern      "1:                                                 \n\t"
497c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp0],    0(%[frow])                 \n\t"
507c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp1],    4(%[frow])                 \n\t"
517c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp2],    8(%[frow])                 \n\t"
527c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp5],    12(%[frow])                \n\t"
537c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
547c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac0,        %[temp0],    %[yscale]     \n\t"
557c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
567c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac1,        %[temp1],    %[yscale]     \n\t"
577c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
587c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac2,        %[temp2],    %[yscale]     \n\t"
597c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
607c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac3,        %[temp5],    %[yscale]     \n\t"
617c8da7ce66017295a65ec028084b90800be377f8James Zern        "addiu    %[frow],     %[frow],     16            \n\t"
627c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp0],    $ac0                       \n\t"
637c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp1],    $ac1                       \n\t"
647c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp2],    $ac2                       \n\t"
657c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp5],    $ac3                       \n\t"
667c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp8],    0(%[irow])                 \n\t"
677c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp9],    4(%[irow])                 \n\t"
687c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp10],   8(%[irow])                 \n\t"
697c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp11],   12(%[irow])                \n\t"
707c8da7ce66017295a65ec028084b90800be377f8James Zern        "addiu    %[dst],      %[dst],      4             \n\t"
717c8da7ce66017295a65ec028084b90800be377f8James Zern        "addiu    %[irow],     %[irow],     16            \n\t"
727c8da7ce66017295a65ec028084b90800be377f8James Zern        "subu     %[temp8],    %[temp8],    %[temp0]      \n\t"
737c8da7ce66017295a65ec028084b90800be377f8James Zern        "subu     %[temp9],    %[temp9],    %[temp1]      \n\t"
747c8da7ce66017295a65ec028084b90800be377f8James Zern        "subu     %[temp10],   %[temp10],   %[temp2]      \n\t"
757c8da7ce66017295a65ec028084b90800be377f8James Zern        "subu     %[temp11],   %[temp11],   %[temp5]      \n\t"
767c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
777c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac0,        %[temp8],    %[temp7]      \n\t"
787c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
797c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac1,        %[temp9],    %[temp7]      \n\t"
807c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
817c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac2,        %[temp10],   %[temp7]      \n\t"
827c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
837c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac3,        %[temp11],   %[temp7]      \n\t"
847c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp8],    $ac0                       \n\t"
857c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp9],    $ac1                       \n\t"
867c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp10],   $ac2                       \n\t"
877c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp11],   $ac3                       \n\t"
887c8da7ce66017295a65ec028084b90800be377f8James Zern        "sw       %[temp0],    -16(%[irow])               \n\t"
897c8da7ce66017295a65ec028084b90800be377f8James Zern        "sw       %[temp1],    -12(%[irow])               \n\t"
907c8da7ce66017295a65ec028084b90800be377f8James Zern        "sw       %[temp2],    -8(%[irow])                \n\t"
917c8da7ce66017295a65ec028084b90800be377f8James Zern        "sw       %[temp5],    -4(%[irow])                \n\t"
927c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb       %[temp8],    -4(%[dst])                 \n\t"
937c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb       %[temp9],    -3(%[dst])                 \n\t"
947c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb       %[temp10],   -2(%[dst])                 \n\t"
957c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb       %[temp11],   -1(%[dst])                 \n\t"
967c8da7ce66017295a65ec028084b90800be377f8James Zern        "bne      %[frow],     %[loop_end], 1b            \n\t"
977c8da7ce66017295a65ec028084b90800be377f8James Zern        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
987c8da7ce66017295a65ec028084b90800be377f8James Zern          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
997c8da7ce66017295a65ec028084b90800be377f8James Zern          [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
1007c8da7ce66017295a65ec028084b90800be377f8James Zern          [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
1017c8da7ce66017295a65ec028084b90800be377f8James Zern          [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
1027c8da7ce66017295a65ec028084b90800be377f8James Zern        : [temp7]"r"(temp7), [yscale]"r"(yscale), [temp6]"r"(temp6)
1037c8da7ce66017295a65ec028084b90800be377f8James Zern        : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
1047c8da7ce66017295a65ec028084b90800be377f8James Zern          "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
1057c8da7ce66017295a65ec028084b90800be377f8James Zern      );
1067c8da7ce66017295a65ec028084b90800be377f8James Zern    }
1077c8da7ce66017295a65ec028084b90800be377f8James Zern    for (i = 0; i < (x_out_max & 0x3); ++i) {
1087c8da7ce66017295a65ec028084b90800be377f8James Zern      const uint32_t frac = (uint32_t)MULT_FIX(*frow++, yscale);
1097c8da7ce66017295a65ec028084b90800be377f8James Zern      const int v = (int)MULT_FIX(*irow - frac, wrk->fxy_scale);
1107c8da7ce66017295a65ec028084b90800be377f8James Zern      assert(v >= 0 && v <= 255);
1117c8da7ce66017295a65ec028084b90800be377f8James Zern      *dst++ = v;
1127c8da7ce66017295a65ec028084b90800be377f8James Zern      *irow++ = frac;   // new fractional start
1137c8da7ce66017295a65ec028084b90800be377f8James Zern    }
1147c8da7ce66017295a65ec028084b90800be377f8James Zern  } else {
1157c8da7ce66017295a65ec028084b90800be377f8James Zern    if (x_out_max >= 4) {
1167c8da7ce66017295a65ec028084b90800be377f8James Zern      __asm__ volatile (
1177c8da7ce66017295a65ec028084b90800be377f8James Zern        "li       %[temp3],    0x10000                    \n\t"
1187c8da7ce66017295a65ec028084b90800be377f8James Zern        "li       %[temp4],    0x8000                     \n\t"
1197c8da7ce66017295a65ec028084b90800be377f8James Zern        "addu     %[loop_end], %[irow],     %[temp6]      \n\t"
1207c8da7ce66017295a65ec028084b90800be377f8James Zern      "1:                                                 \n\t"
1217c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp0],    0(%[irow])                 \n\t"
1227c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp1],    4(%[irow])                 \n\t"
1237c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp2],    8(%[irow])                 \n\t"
1247c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp5],    12(%[irow])                \n\t"
1257c8da7ce66017295a65ec028084b90800be377f8James Zern        "addiu    %[dst],      %[dst],      4             \n\t"
1267c8da7ce66017295a65ec028084b90800be377f8James Zern        "addiu    %[irow],     %[irow],     16            \n\t"
1277c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
1287c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
1297c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
1307c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
1317c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
1327c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
1337c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
1347c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac3,        %[temp5],    %[temp7]      \n\t"
1357c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp0],    $ac0                       \n\t"
1367c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp1],    $ac1                       \n\t"
1377c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp2],    $ac2                       \n\t"
1387c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp5],    $ac3                       \n\t"
1397c8da7ce66017295a65ec028084b90800be377f8James Zern        "sw       $zero,       -16(%[irow])               \n\t"
1407c8da7ce66017295a65ec028084b90800be377f8James Zern        "sw       $zero,       -12(%[irow])               \n\t"
1417c8da7ce66017295a65ec028084b90800be377f8James Zern        "sw       $zero,       -8(%[irow])                \n\t"
1427c8da7ce66017295a65ec028084b90800be377f8James Zern        "sw       $zero,       -4(%[irow])                \n\t"
1437c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb       %[temp0],    -4(%[dst])                 \n\t"
1447c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb       %[temp1],    -3(%[dst])                 \n\t"
1457c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb       %[temp2],    -2(%[dst])                 \n\t"
1467c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb       %[temp5],    -1(%[dst])                 \n\t"
1477c8da7ce66017295a65ec028084b90800be377f8James Zern        "bne      %[irow],     %[loop_end], 1b            \n\t"
1487c8da7ce66017295a65ec028084b90800be377f8James Zern        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
1497c8da7ce66017295a65ec028084b90800be377f8James Zern          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),
1507c8da7ce66017295a65ec028084b90800be377f8James Zern          [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
1517c8da7ce66017295a65ec028084b90800be377f8James Zern        : [temp7]"r"(temp7), [temp6]"r"(temp6)
1527c8da7ce66017295a65ec028084b90800be377f8James Zern        : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
1537c8da7ce66017295a65ec028084b90800be377f8James Zern          "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
1547c8da7ce66017295a65ec028084b90800be377f8James Zern      );
1557c8da7ce66017295a65ec028084b90800be377f8James Zern    }
1567c8da7ce66017295a65ec028084b90800be377f8James Zern    for (i = 0; i < (x_out_max & 0x3); ++i) {
1577c8da7ce66017295a65ec028084b90800be377f8James Zern      const int v = (int)MULT_FIX(*irow, wrk->fxy_scale);
1587c8da7ce66017295a65ec028084b90800be377f8James Zern      assert(v >= 0 && v <= 255);
1597c8da7ce66017295a65ec028084b90800be377f8James Zern      *dst++ = v;
1607c8da7ce66017295a65ec028084b90800be377f8James Zern      *irow++ = 0;
1617c8da7ce66017295a65ec028084b90800be377f8James Zern    }
1627c8da7ce66017295a65ec028084b90800be377f8James Zern  }
1637c8da7ce66017295a65ec028084b90800be377f8James Zern}
1647c8da7ce66017295a65ec028084b90800be377f8James Zern
1657c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void ExportRowExpand(WebPRescaler* const wrk) {
1667c8da7ce66017295a65ec028084b90800be377f8James Zern  int i;
1677c8da7ce66017295a65ec028084b90800be377f8James Zern  uint8_t* dst = wrk->dst;
1687c8da7ce66017295a65ec028084b90800be377f8James Zern  rescaler_t* irow = wrk->irow;
1697c8da7ce66017295a65ec028084b90800be377f8James Zern  const int x_out_max = wrk->dst_width * wrk->num_channels;
1707c8da7ce66017295a65ec028084b90800be377f8James Zern  const rescaler_t* frow = wrk->frow;
1717c8da7ce66017295a65ec028084b90800be377f8James Zern  int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
1727c8da7ce66017295a65ec028084b90800be377f8James Zern  const int temp6 = (x_out_max & ~0x3) << 2;
1737c8da7ce66017295a65ec028084b90800be377f8James Zern  const int temp7 = (int)wrk->fy_scale;
1747c8da7ce66017295a65ec028084b90800be377f8James Zern  assert(!WebPRescalerOutputDone(wrk));
1757c8da7ce66017295a65ec028084b90800be377f8James Zern  assert(wrk->y_accum <= 0);
1767c8da7ce66017295a65ec028084b90800be377f8James Zern  assert(wrk->y_expand);
1777c8da7ce66017295a65ec028084b90800be377f8James Zern  assert(wrk->y_sub != 0);
1787c8da7ce66017295a65ec028084b90800be377f8James Zern  if (wrk->y_accum == 0) {
1797c8da7ce66017295a65ec028084b90800be377f8James Zern    if (x_out_max >= 4) {
1807c8da7ce66017295a65ec028084b90800be377f8James Zern      __asm__ volatile (
1817c8da7ce66017295a65ec028084b90800be377f8James Zern        "li       %[temp4],    0x10000                    \n\t"
1827c8da7ce66017295a65ec028084b90800be377f8James Zern        "li       %[temp5],    0x8000                     \n\t"
1837c8da7ce66017295a65ec028084b90800be377f8James Zern        "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
1847c8da7ce66017295a65ec028084b90800be377f8James Zern      "1:                                                 \n\t"
1857c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp0],    0(%[frow])                 \n\t"
1867c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp1],    4(%[frow])                 \n\t"
1877c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp2],    8(%[frow])                 \n\t"
1887c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp3],    12(%[frow])                \n\t"
1897c8da7ce66017295a65ec028084b90800be377f8James Zern        "addiu    %[dst],      %[dst],      4             \n\t"
1907c8da7ce66017295a65ec028084b90800be377f8James Zern        "addiu    %[frow],     %[frow],     16            \n\t"
1917c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac0,        %[temp4],    %[temp5]      \n\t"
1927c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
1937c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac1,        %[temp4],    %[temp5]      \n\t"
1947c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
1957c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac2,        %[temp4],    %[temp5]      \n\t"
1967c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
1977c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac3,        %[temp4],    %[temp5]      \n\t"
1987c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac3,        %[temp3],    %[temp7]      \n\t"
1997c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp0],    $ac0                       \n\t"
2007c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp1],    $ac1                       \n\t"
2017c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp2],    $ac2                       \n\t"
2027c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp3],    $ac3                       \n\t"
2037c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb       %[temp0],    -4(%[dst])                 \n\t"
2047c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb       %[temp1],    -3(%[dst])                 \n\t"
2057c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb       %[temp2],    -2(%[dst])                 \n\t"
2067c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb       %[temp3],    -1(%[dst])                 \n\t"
2077c8da7ce66017295a65ec028084b90800be377f8James Zern        "bne      %[frow],     %[loop_end], 1b            \n\t"
2087c8da7ce66017295a65ec028084b90800be377f8James Zern        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
2097c8da7ce66017295a65ec028084b90800be377f8James Zern          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
2107c8da7ce66017295a65ec028084b90800be377f8James Zern          [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
2117c8da7ce66017295a65ec028084b90800be377f8James Zern        : [temp7]"r"(temp7), [temp6]"r"(temp6)
2127c8da7ce66017295a65ec028084b90800be377f8James Zern        : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
2137c8da7ce66017295a65ec028084b90800be377f8James Zern          "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
2147c8da7ce66017295a65ec028084b90800be377f8James Zern      );
2157c8da7ce66017295a65ec028084b90800be377f8James Zern    }
2167c8da7ce66017295a65ec028084b90800be377f8James Zern    for (i = 0; i < (x_out_max & 0x3); ++i) {
2177c8da7ce66017295a65ec028084b90800be377f8James Zern      const uint32_t J = *frow++;
2187c8da7ce66017295a65ec028084b90800be377f8James Zern      const int v = (int)MULT_FIX(J, wrk->fy_scale);
2197c8da7ce66017295a65ec028084b90800be377f8James Zern      assert(v >= 0 && v <= 255);
2207c8da7ce66017295a65ec028084b90800be377f8James Zern      *dst++ = v;
2217c8da7ce66017295a65ec028084b90800be377f8James Zern    }
2227c8da7ce66017295a65ec028084b90800be377f8James Zern  } else {
2237c8da7ce66017295a65ec028084b90800be377f8James Zern    const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
2247c8da7ce66017295a65ec028084b90800be377f8James Zern    const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
2257c8da7ce66017295a65ec028084b90800be377f8James Zern    if (x_out_max >= 4) {
2267c8da7ce66017295a65ec028084b90800be377f8James Zern      int temp8, temp9, temp10, temp11;
2277c8da7ce66017295a65ec028084b90800be377f8James Zern      __asm__ volatile (
2287c8da7ce66017295a65ec028084b90800be377f8James Zern        "li       %[temp8],    0x10000                    \n\t"
2297c8da7ce66017295a65ec028084b90800be377f8James Zern        "li       %[temp9],    0x8000                     \n\t"
2307c8da7ce66017295a65ec028084b90800be377f8James Zern        "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
2317c8da7ce66017295a65ec028084b90800be377f8James Zern      "1:                                                 \n\t"
2327c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp0],    0(%[frow])                 \n\t"
2337c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp1],    4(%[frow])                 \n\t"
2347c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp2],    8(%[frow])                 \n\t"
2357c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp3],    12(%[frow])                \n\t"
2367c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp4],    0(%[irow])                 \n\t"
2377c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp5],    4(%[irow])                 \n\t"
2387c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp10],   8(%[irow])                 \n\t"
2397c8da7ce66017295a65ec028084b90800be377f8James Zern        "lw       %[temp11],   12(%[irow])                \n\t"
2407c8da7ce66017295a65ec028084b90800be377f8James Zern        "addiu    %[dst],      %[dst],      4             \n\t"
2417c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac0,        %[temp8],    %[temp9]      \n\t"
2427c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac0,        %[A],        %[temp0]      \n\t"
2437c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac0,        %[B],        %[temp4]      \n\t"
2447c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac1,        %[temp8],    %[temp9]      \n\t"
2457c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac1,        %[A],        %[temp1]      \n\t"
2467c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac1,        %[B],        %[temp5]      \n\t"
2477c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac2,        %[temp8],    %[temp9]      \n\t"
2487c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac2,        %[A],        %[temp2]      \n\t"
2497c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac2,        %[B],        %[temp10]     \n\t"
2507c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac3,        %[temp8],    %[temp9]      \n\t"
2517c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac3,        %[A],        %[temp3]      \n\t"
2527c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac3,        %[B],        %[temp11]     \n\t"
2537c8da7ce66017295a65ec028084b90800be377f8James Zern        "addiu    %[frow],     %[frow],     16            \n\t"
2547c8da7ce66017295a65ec028084b90800be377f8James Zern        "addiu    %[irow],     %[irow],     16            \n\t"
2557c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp0],    $ac0                       \n\t"
2567c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp1],    $ac1                       \n\t"
2577c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp2],    $ac2                       \n\t"
2587c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp3],    $ac3                       \n\t"
2597c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac0,        %[temp8],    %[temp9]      \n\t"
2607c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
2617c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac1,        %[temp8],    %[temp9]      \n\t"
2627c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
2637c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac2,        %[temp8],    %[temp9]      \n\t"
2647c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
2657c8da7ce66017295a65ec028084b90800be377f8James Zern        "mult     $ac3,        %[temp8],    %[temp9]      \n\t"
2667c8da7ce66017295a65ec028084b90800be377f8James Zern        "maddu    $ac3,        %[temp3],    %[temp7]      \n\t"
2677c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp0],    $ac0                       \n\t"
2687c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp1],    $ac1                       \n\t"
2697c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp2],    $ac2                       \n\t"
2707c8da7ce66017295a65ec028084b90800be377f8James Zern        "mfhi     %[temp3],    $ac3                       \n\t"
2717c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb       %[temp0],    -4(%[dst])                 \n\t"
2727c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb       %[temp1],    -3(%[dst])                 \n\t"
2737c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb       %[temp2],    -2(%[dst])                 \n\t"
2747c8da7ce66017295a65ec028084b90800be377f8James Zern        "sb       %[temp3],    -1(%[dst])                 \n\t"
2757c8da7ce66017295a65ec028084b90800be377f8James Zern        "bne      %[frow],     %[loop_end], 1b            \n\t"
2767c8da7ce66017295a65ec028084b90800be377f8James Zern        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
2777c8da7ce66017295a65ec028084b90800be377f8James Zern          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
2787c8da7ce66017295a65ec028084b90800be377f8James Zern          [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
2797c8da7ce66017295a65ec028084b90800be377f8James Zern          [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
2807c8da7ce66017295a65ec028084b90800be377f8James Zern          [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
2817c8da7ce66017295a65ec028084b90800be377f8James Zern        : [temp7]"r"(temp7), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)
2827c8da7ce66017295a65ec028084b90800be377f8James Zern        : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
2837c8da7ce66017295a65ec028084b90800be377f8James Zern          "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
2847c8da7ce66017295a65ec028084b90800be377f8James Zern      );
2857c8da7ce66017295a65ec028084b90800be377f8James Zern    }
2867c8da7ce66017295a65ec028084b90800be377f8James Zern    for (i = 0; i < (x_out_max & 0x3); ++i) {
2877c8da7ce66017295a65ec028084b90800be377f8James Zern      const uint64_t I = (uint64_t)A * *frow++
2887c8da7ce66017295a65ec028084b90800be377f8James Zern                       + (uint64_t)B * *irow++;
2897c8da7ce66017295a65ec028084b90800be377f8James Zern      const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
2907c8da7ce66017295a65ec028084b90800be377f8James Zern      const int v = (int)MULT_FIX(J, wrk->fy_scale);
2917c8da7ce66017295a65ec028084b90800be377f8James Zern      assert(v >= 0 && v <= 255);
2927c8da7ce66017295a65ec028084b90800be377f8James Zern      *dst++ = v;
2937c8da7ce66017295a65ec028084b90800be377f8James Zern    }
2947c8da7ce66017295a65ec028084b90800be377f8James Zern  }
2957c8da7ce66017295a65ec028084b90800be377f8James Zern}
2967c8da7ce66017295a65ec028084b90800be377f8James Zern
2977c8da7ce66017295a65ec028084b90800be377f8James Zern#undef MULT_FIX
2987c8da7ce66017295a65ec028084b90800be377f8James Zern#undef ROUNDER
2997c8da7ce66017295a65ec028084b90800be377f8James Zern
3007c8da7ce66017295a65ec028084b90800be377f8James Zern//------------------------------------------------------------------------------
3017c8da7ce66017295a65ec028084b90800be377f8James Zern// Entry point
3027c8da7ce66017295a65ec028084b90800be377f8James Zern
3037c8da7ce66017295a65ec028084b90800be377f8James Zernextern void WebPRescalerDspInitMIPSdspR2(void);
3047c8da7ce66017295a65ec028084b90800be377f8James Zern
3057c8da7ce66017295a65ec028084b90800be377f8James ZernWEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPSdspR2(void) {
3067c8da7ce66017295a65ec028084b90800be377f8James Zern  WebPRescalerExportRowExpand = ExportRowExpand;
3077c8da7ce66017295a65ec028084b90800be377f8James Zern  WebPRescalerExportRowShrink = ExportRowShrink;
3087c8da7ce66017295a65ec028084b90800be377f8James Zern}
3097c8da7ce66017295a65ec028084b90800be377f8James Zern
3107c8da7ce66017295a65ec028084b90800be377f8James Zern#else  // !WEBP_USE_MIPS_DSP_R2
3117c8da7ce66017295a65ec028084b90800be377f8James Zern
3127c8da7ce66017295a65ec028084b90800be377f8James ZernWEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPSdspR2)
3137c8da7ce66017295a65ec028084b90800be377f8James Zern
3147c8da7ce66017295a65ec028084b90800be377f8James Zern#endif  // WEBP_USE_MIPS_DSP_R2
315