15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2012 Google Inc. All Rights Reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
3eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// Use of this source code is governed by a BSD-style license
4eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// that can be found in the COPYING file in the root of the source
5eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// tree. An additional intellectual property rights grant can be found
6eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// in the file PATENTS. All contributing project authors may
7eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// be found in the AUTHORS file in the root of the source tree.
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -----------------------------------------------------------------------------
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Rescaling functions
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Author: Skal (pascal.massimino@gmail.com)
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <assert.h>
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stdlib.h>
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "./rescaler.h"
175f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#include "../dsp/dsp.h"
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------
205f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// Implementations of critical functions ImportRow / ExportRow
215f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
225f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)void (*WebPRescalerImportRow)(WebPRescaler* const wrk,
235f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)                              const uint8_t* const src, int channel) = NULL;
245f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)void (*WebPRescalerExportRow)(WebPRescaler* const wrk, int x_out) = NULL;
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define RFIX 30
27c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#define MULT_FIX(x, y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
295f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)static void ImportRowC(WebPRescaler* const wrk,
305f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)                       const uint8_t* const src, int channel) {
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int x_stride = wrk->num_channels;
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int x_out_max = wrk->dst_width * wrk->num_channels;
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int x_in = channel;
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int x_out;
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int accum = 0;
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!wrk->x_expand) {
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int sum = 0;
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      accum += wrk->x_add;
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      for (; accum > 0; accum -= wrk->x_sub) {
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        sum += src[x_in];
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        x_in += x_stride;
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      {        // Emit next horizontal pixel.
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        const int32_t base = src[x_in];
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        const int32_t frac = base * (-accum);
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        x_in += x_stride;
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        wrk->frow[x_out] = (sum + base) * wrk->x_sub - frac;
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // fresh fractional start for next pixel
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        sum = (int)MULT_FIX(frac, wrk->fx_scale);
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {        // simple bilinear interpolation
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int left = src[channel], right = src[channel];
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (accum < 0) {
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        left = right;
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        x_in += x_stride;
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        right = src[x_in];
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        accum += wrk->x_add;
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      accum -= wrk->x_sub;
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
665f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  // Accumulate the contribution of the new row.
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    wrk->irow[x_out] += wrk->frow[x_out];
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
725f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)static void ExportRowC(WebPRescaler* const wrk, int x_out) {
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (wrk->y_accum <= 0) {
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint8_t* const dst = wrk->dst;
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int32_t* const irow = wrk->irow;
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int32_t* const frow = wrk->frow;
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int yscale = wrk->fy_scale * (-wrk->y_accum);
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int x_out_max = wrk->dst_width * wrk->num_channels;
795f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    for (; x_out < x_out_max; ++x_out) {
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const int frac = (int)MULT_FIX(frow[x_out], yscale);
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      irow[x_out] = frac;   // new fractional start
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    wrk->y_accum += wrk->y_add;
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    wrk->dst += wrk->dst_stride;
875f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  }
885f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)}
895f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
905f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//------------------------------------------------------------------------------
915f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// MIPS version
925f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
935f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#if defined(WEBP_USE_MIPS32)
945f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
955f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)static void ImportRowMIPS(WebPRescaler* const wrk,
965f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)                          const uint8_t* const src, int channel) {
975f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  const int x_stride = wrk->num_channels;
985f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  const int x_out_max = wrk->dst_width * wrk->num_channels;
995f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  const int fx_scale = wrk->fx_scale;
1005f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  const int x_add = wrk->x_add;
1015f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  const int x_sub = wrk->x_sub;
1025f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int* frow = wrk->frow + channel;
1035f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int* irow = wrk->irow + channel;
1045f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  const uint8_t* src1 = src + channel;
1055f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int temp1, temp2, temp3;
1065f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int base, frac, sum;
1075f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int accum, accum1;
1085f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  const int x_stride1 = x_stride << 2;
1095f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int loop_c = x_out_max - channel;
1105f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
1115f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  if (!wrk->x_expand) {
1125f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    __asm__ volatile (
1135f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "li     %[temp1],   0x8000                    \n\t"
1145f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "li     %[temp2],   0x10000                   \n\t"
1155f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "li     %[sum],     0                         \n\t"
1165f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "li     %[accum],   0                         \n\t"
1175f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    "1:                                             \n\t"
1185f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "addu   %[accum],   %[accum],   %[x_add]      \n\t"
1195f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "blez   %[accum],   3f                        \n\t"
1205f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    "2:                                             \n\t"
1215f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "lbu    %[temp3],   0(%[src1])                \n\t"
1225f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "subu   %[accum],   %[accum],   %[x_sub]      \n\t"
1235f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "addu   %[src1],    %[src1],    %[x_stride]   \n\t"
1245f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "addu   %[sum],     %[sum],     %[temp3]      \n\t"
1255f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "bgtz   %[accum],   2b                        \n\t"
1265f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    "3:                                             \n\t"
1275f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "lbu    %[base],    0(%[src1])                \n\t"
1285f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "addu   %[src1],    %[src1],    %[x_stride]   \n\t"
1295f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "negu   %[accum1],  %[accum]                  \n\t"
1305f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "mul    %[frac],    %[base],    %[accum1]     \n\t"
1315f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "addu   %[temp3],   %[sum],     %[base]       \n\t"
1325f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "mul    %[temp3],   %[temp3],   %[x_sub]      \n\t"
1335f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "lw     %[base],    0(%[irow])                \n\t"
1345f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "subu   %[loop_c],  %[loop_c],  %[x_stride]   \n\t"
1355f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "sll    %[accum1],  %[frac],    2             \n\t"
1365f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "mult   %[temp1],   %[temp2]                  \n\t"
1375f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "madd   %[accum1],  %[fx_scale]               \n\t"
1385f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "mfhi   %[sum]                                \n\t"
1395f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "subu   %[temp3],   %[temp3],   %[frac]       \n\t"
1405f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "sw     %[temp3],   0(%[frow])                \n\t"
1415f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "add    %[base],    %[base],    %[temp3]      \n\t"
1425f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "sw     %[base],    0(%[irow])                \n\t"
1435f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "addu   %[irow],    %[irow],    %[x_stride1]  \n\t"
1445f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "addu   %[frow],    %[frow],    %[x_stride1]  \n\t"
1455f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "bgtz   %[loop_c],  1b                        \n\t"
1465f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
1475f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      : [accum] "=&r" (accum), [src1] "+r" (src1), [temp3] "=&r" (temp3),
1485f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        [sum] "=&r" (sum), [base] "=&r" (base), [frac] "=&r" (frac),
1495f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        [frow] "+r" (frow), [irow] "+r" (irow), [accum1] "=&r" (accum1),
1505f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        [temp2] "=&r" (temp2), [temp1] "=&r" (temp1)
1515f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      : [x_stride] "r" (x_stride), [fx_scale] "r" (fx_scale),
1525f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        [x_sub] "r" (x_sub), [x_add] "r" (x_add),
1535f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        [loop_c] "r" (loop_c), [x_stride1] "r" (x_stride1)
1545f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      : "memory", "hi", "lo"
1555f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    );
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
1575f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    __asm__ volatile (
1585f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "lbu    %[temp1],   0(%[src1])                \n\t"
1595f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "move   %[temp2],   %[temp1]                  \n\t"
1605f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "li     %[accum],   0                         \n\t"
1615f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    "1:                                             \n\t"
1625f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "bgez   %[accum],   2f                        \n\t"
1635f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "move   %[temp2],   %[temp1]                  \n\t"
1645f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "addu   %[src1],    %[x_stride]               \n\t"
1655f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "lbu    %[temp1],   0(%[src1])                \n\t"
1665f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "addu   %[accum],   %[x_add]                  \n\t"
1675f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    "2:                                             \n\t"
1685f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "subu   %[temp3],   %[temp2],   %[temp1]      \n\t"
1695f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "mul    %[temp3],   %[temp3],   %[accum]      \n\t"
1705f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "mul    %[base],    %[temp1],   %[x_add]      \n\t"
1715f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "subu   %[accum],   %[accum],   %[x_sub]      \n\t"
1725f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "lw     %[frac],    0(%[irow])                \n\t"
1735f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "subu   %[loop_c],  %[loop_c],  %[x_stride]   \n\t"
1745f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "addu   %[temp3],   %[base],    %[temp3]      \n\t"
1755f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "sw     %[temp3],   0(%[frow])                \n\t"
1765f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "addu   %[frow],    %[x_stride1]              \n\t"
1775f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "addu   %[frac],    %[temp3]                  \n\t"
1785f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "sw     %[frac],    0(%[irow])                \n\t"
1795f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "addu   %[irow],    %[x_stride1]              \n\t"
1805f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "bgtz   %[loop_c],  1b                        \n\t"
1815f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
1825f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      : [src1] "+r" (src1), [accum] "=&r" (accum), [temp1] "=&r" (temp1),
1835f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), [base] "=&r" (base),
1845f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        [frac] "=&r" (frac), [frow] "+r" (frow), [irow] "+r" (irow)
1855f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      : [x_stride] "r" (x_stride), [x_add] "r" (x_add), [x_sub] "r" (x_sub),
1865f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        [x_stride1] "r" (x_stride1), [loop_c] "r" (loop_c)
1875f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      : "memory", "hi", "lo"
1885f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    );
1895f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  }
1905f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)}
1915f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
1925f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)static void ExportRowMIPS(WebPRescaler* const wrk, int x_out) {
1935f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  if (wrk->y_accum <= 0) {
1945f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    uint8_t* const dst = wrk->dst;
1955f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    int32_t* const irow = wrk->irow;
1965f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    const int32_t* const frow = wrk->frow;
1975f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    const int yscale = wrk->fy_scale * (-wrk->y_accum);
1985f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    const int x_out_max = wrk->dst_width * wrk->num_channels;
1995f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    // if wrk->fxy_scale can fit into 32 bits use optimized code,
2005f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    // otherwise use C code
2015f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    if ((wrk->fxy_scale >> 32) == 0) {
2025f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      int temp0, temp1, temp3, temp4, temp5, temp6, temp7, loop_end;
2035f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      const int temp2 = (int)(wrk->fxy_scale);
2045f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      const int temp8 = x_out_max << 2;
2055f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      uint8_t* dst_t = (uint8_t*)dst;
2065f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      int32_t* irow_t = (int32_t*)irow;
2075f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      const int32_t* frow_t = (const int32_t*)frow;
2085f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
2095f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      __asm__ volatile(
2105f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "addiu    %[temp6],    $zero,       -256          \n\t"
2115f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "addiu    %[temp7],    $zero,       255           \n\t"
2125f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "li       %[temp3],    0x10000                    \n\t"
2135f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "li       %[temp4],    0x8000                     \n\t"
2145f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "addu     %[loop_end], %[frow_t],   %[temp8]      \n\t"
2155f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "1:                                                 \n\t"
2165f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "lw       %[temp0],    0(%[frow_t])               \n\t"
2175f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "mult     %[temp3],    %[temp4]                   \n\t"
2185f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "addiu    %[frow_t],   %[frow_t],   4             \n\t"
2195f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "sll      %[temp0],    %[temp0],    2             \n\t"
2205f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "madd     %[temp0],    %[yscale]                  \n\t"
2215f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "mfhi     %[temp1]                                \n\t"
2225f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "lw       %[temp0],    0(%[irow_t])               \n\t"
2235f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "addiu    %[dst_t],    %[dst_t],    1             \n\t"
2245f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "addiu    %[irow_t],   %[irow_t],   4             \n\t"
2255f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "subu     %[temp0],    %[temp0],    %[temp1]      \n\t"
2265f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "mult     %[temp3],    %[temp4]                   \n\t"
2275f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "sll      %[temp0],    %[temp0],    2             \n\t"
2285f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "madd     %[temp0],    %[temp2]                   \n\t"
2295f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "mfhi     %[temp5]                                \n\t"
2305f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "sw       %[temp1],    -4(%[irow_t])              \n\t"
2315f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "and      %[temp0],    %[temp5],    %[temp6]      \n\t"
2325f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "slti     %[temp1],    %[temp5],    0             \n\t"
2335f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "beqz     %[temp0],    2f                         \n\t"
2345f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "xor      %[temp5],    %[temp5],    %[temp5]      \n\t"
2355f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "movz     %[temp5],    %[temp7],    %[temp1]      \n\t"
2365f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      "2:                                                 \n\t"
2375f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "sb       %[temp5],    -1(%[dst_t])               \n\t"
2385f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        "bne      %[frow_t],   %[loop_end], 1b            \n\t"
2395f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
2405f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
2415f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
2425f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)          [temp7]"=&r"(temp7), [frow_t]"+r"(frow_t), [irow_t]"+r"(irow_t),
2435f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)          [dst_t]"+r"(dst_t), [loop_end]"=&r"(loop_end)
2445f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        : [temp2]"r"(temp2), [yscale]"r"(yscale), [temp8]"r"(temp8)
2455f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        : "memory", "hi", "lo"
2465f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      );
2475f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      wrk->y_accum += wrk->y_add;
2485f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      wrk->dst += wrk->dst_stride;
2495f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    } else {
2505f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      ExportRowC(wrk, x_out);
2515f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    }
2525f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  }
2535f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)}
2545f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#endif   // WEBP_USE_MIPS32
2555f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
2565f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//------------------------------------------------------------------------------
2575f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
2585f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
2595f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)                      uint8_t* const dst, int dst_width, int dst_height,
2605f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)                      int dst_stride, int num_channels, int x_add, int x_sub,
2615f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)                      int y_add, int y_sub, int32_t* const work) {
2625f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->x_expand = (src_width < dst_width);
2635f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->src_width = src_width;
2645f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->src_height = src_height;
2655f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->dst_width = dst_width;
2665f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->dst_height = dst_height;
2675f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->dst = dst;
2685f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->dst_stride = dst_stride;
2695f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->num_channels = num_channels;
2705f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  // for 'x_expand', we use bilinear interpolation
2715f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub;
2725f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub;
2735f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->y_accum = y_add;
2745f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->y_add = y_add;
2755f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->y_sub = y_sub;
2765f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->fx_scale = (1 << RFIX) / x_sub;
2775f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->fy_scale = (1 << RFIX) / y_sub;
2785f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->fxy_scale = wrk->x_expand ?
2795f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      ((int64_t)dst_height << RFIX) / (x_sub * src_height) :
2805f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      ((int64_t)dst_height << RFIX) / (x_add * src_height);
2815f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->irow = work;
2825f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  wrk->frow = work + num_channels * dst_width;
2835f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
2845f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  if (WebPRescalerImportRow == NULL) {
2855f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    WebPRescalerImportRow = ImportRowC;
2865f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    WebPRescalerExportRow = ExportRowC;
2875f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    if (VP8GetCPUInfo != NULL) {
2885f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#if defined(WEBP_USE_MIPS32)
2895f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      if (VP8GetCPUInfo(kMIPS32)) {
2905f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        WebPRescalerImportRow = ImportRowMIPS;
2915f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        WebPRescalerExportRow = ExportRowMIPS;
2925f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      }
2935f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#endif
2945f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    }
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#undef MULT_FIX
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#undef RFIX
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// all-in-one calls
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3045d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)int WebPRescaleNeededLines(const WebPRescaler* const wrk, int max_num_lines) {
3055d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  const int num_lines = (wrk->y_accum + wrk->y_sub - 1) / wrk->y_sub;
3065d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  return (num_lines > max_num_lines) ? max_num_lines : num_lines;
3075d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
3085d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int WebPRescalerImport(WebPRescaler* const wrk, int num_lines,
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                       const uint8_t* src, int src_stride) {
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int total_imported = 0;
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (total_imported < num_lines && wrk->y_accum > 0) {
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int channel;
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (channel = 0; channel < wrk->num_channels; ++channel) {
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      WebPRescalerImportRow(wrk, src, channel);
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    src += src_stride;
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ++total_imported;
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    wrk->y_accum -= wrk->y_sub;
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return total_imported;
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int WebPRescalerExport(WebPRescaler* const rescaler) {
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int total_exported = 0;
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (WebPRescalerHasPendingOutput(rescaler)) {
3275f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    WebPRescalerExportRow(rescaler, 0);
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ++total_exported;
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return total_exported;
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------
334