1a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// Copyright 2012 Google Inc. All Rights Reserved. 2a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// 30406ce1417f76f2034833414dcecc9f56253640cVikas Arora// Use of this source code is governed by a BSD-style license 40406ce1417f76f2034833414dcecc9f56253640cVikas Arora// that can be found in the COPYING file in the root of the source 50406ce1417f76f2034833414dcecc9f56253640cVikas Arora// tree. An additional intellectual property rights grant can be found 60406ce1417f76f2034833414dcecc9f56253640cVikas Arora// in the file PATENTS. All contributing project authors may 70406ce1417f76f2034833414dcecc9f56253640cVikas Arora// be found in the AUTHORS file in the root of the source tree. 8a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// ----------------------------------------------------------------------------- 9a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// 10a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// Rescaling functions 11a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// 12a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// Author: Skal (pascal.massimino@gmail.com) 13a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 14a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#include <assert.h> 15a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#include <stdlib.h> 16a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#include "./rescaler.h" 17af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#include "../dsp/dsp.h" 18a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 19a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------ 20af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora// Implementations of critical functions ImportRow / ExportRow 21af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora 22af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Aroravoid (*WebPRescalerImportRow)(WebPRescaler* const wrk, 23af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora const uint8_t* const src, int channel) = NULL; 24af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Aroravoid (*WebPRescalerExportRow)(WebPRescaler* const wrk, int x_out) = NULL; 25a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 26a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#define RFIX 30 270406ce1417f76f2034833414dcecc9f56253640cVikas Arora#define MULT_FIX(x, y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX) 28a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 29af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arorastatic void ImportRowC(WebPRescaler* const wrk, 30af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora const uint8_t* const src, int channel) { 31a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora const int x_stride = wrk->num_channels; 32a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora const int x_out_max = wrk->dst_width * wrk->num_channels; 33a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora int x_in = channel; 34a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora int x_out; 35a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora int accum = 0; 36a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora if (!wrk->x_expand) { 37a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora int sum = 0; 38a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora for (x_out = channel; x_out < x_out_max; x_out += x_stride) { 39a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora accum += wrk->x_add; 40a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora for (; accum > 0; accum -= wrk->x_sub) { 41a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora sum += src[x_in]; 42a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora x_in += x_stride; 43a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora } 44a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora { // Emit next horizontal pixel. 45a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora const int32_t base = src[x_in]; 46a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora const int32_t frac = base * (-accum); 47a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora x_in += x_stride; 48a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora wrk->frow[x_out] = (sum + base) * wrk->x_sub - frac; 49a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora // fresh fractional start for next pixel 50a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora sum = (int)MULT_FIX(frac, wrk->fx_scale); 51a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora } 52a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora } 53a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora } else { // simple bilinear interpolation 54a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora int left = src[channel], right = src[channel]; 55a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora for (x_out = channel; x_out < x_out_max; x_out += x_stride) { 56a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora if (accum < 0) { 57a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora left = right; 58a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora x_in += x_stride; 59a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora right = src[x_in]; 60a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora accum += wrk->x_add; 61a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora } 62a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum; 63a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora accum -= wrk->x_sub; 64a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora } 65a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora } 66af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora // Accumulate the contribution of the new row. 67a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora for (x_out = channel; x_out < x_out_max; x_out += x_stride) { 68a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora wrk->irow[x_out] += wrk->frow[x_out]; 69a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora } 70a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora} 71a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 72af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arorastatic void ExportRowC(WebPRescaler* const wrk, int x_out) { 73a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora if (wrk->y_accum <= 0) { 74a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora uint8_t* const dst = wrk->dst; 75a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora int32_t* const irow = wrk->irow; 76a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora const int32_t* const frow = wrk->frow; 77a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora const int yscale = wrk->fy_scale * (-wrk->y_accum); 78a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora const int x_out_max = wrk->dst_width * wrk->num_channels; 79af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora for (; x_out < x_out_max; ++x_out) { 80a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora const int frac = (int)MULT_FIX(frow[x_out], yscale); 81a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale); 82a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; 83a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora irow[x_out] = frac; // new fractional start 84a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora } 85a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora wrk->y_accum += wrk->y_add; 86a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora wrk->dst += wrk->dst_stride; 87af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora } 88af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora} 89af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora 90af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora//------------------------------------------------------------------------------ 91af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora// MIPS version 92af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora 93af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#if defined(WEBP_USE_MIPS32) 94af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora 95af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arorastatic void ImportRowMIPS(WebPRescaler* const wrk, 96af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora const uint8_t* const src, int channel) { 97af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora const int x_stride = wrk->num_channels; 98af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora const int x_out_max = wrk->dst_width * wrk->num_channels; 99af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora const int fx_scale = wrk->fx_scale; 100af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora const int x_add = wrk->x_add; 101af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora const int x_sub = wrk->x_sub; 102af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora int* frow = wrk->frow + channel; 103af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora int* irow = wrk->irow + channel; 104af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora const uint8_t* src1 = src + channel; 105af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora int temp1, temp2, temp3; 106af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora int base, frac, sum; 107af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora int accum, accum1; 108af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora const int x_stride1 = x_stride << 2; 109af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora int loop_c = x_out_max - channel; 110af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora 111af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora if (!wrk->x_expand) { 112af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora __asm__ volatile ( 113af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "li %[temp1], 0x8000 \n\t" 114af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "li %[temp2], 0x10000 \n\t" 115af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "li %[sum], 0 \n\t" 116af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "li %[accum], 0 \n\t" 117af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "1: \n\t" 118af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addu %[accum], %[accum], %[x_add] \n\t" 119af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "blez %[accum], 3f \n\t" 120af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "2: \n\t" 121af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "lbu %[temp3], 0(%[src1]) \n\t" 122af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "subu %[accum], %[accum], %[x_sub] \n\t" 123af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addu %[src1], %[src1], %[x_stride] \n\t" 124af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addu %[sum], %[sum], %[temp3] \n\t" 125af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "bgtz %[accum], 2b \n\t" 126af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "3: \n\t" 127af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "lbu %[base], 0(%[src1]) \n\t" 128af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addu %[src1], %[src1], %[x_stride] \n\t" 129af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "negu %[accum1], %[accum] \n\t" 130af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "mul %[frac], %[base], %[accum1] \n\t" 131af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addu %[temp3], %[sum], %[base] \n\t" 132af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "mul %[temp3], %[temp3], %[x_sub] \n\t" 133af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "lw %[base], 0(%[irow]) \n\t" 134af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "subu %[loop_c], %[loop_c], %[x_stride] \n\t" 135af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "sll %[accum1], %[frac], 2 \n\t" 136af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "mult %[temp1], %[temp2] \n\t" 137af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "madd %[accum1], %[fx_scale] \n\t" 138af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "mfhi %[sum] \n\t" 139af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "subu %[temp3], %[temp3], %[frac] \n\t" 140af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "sw %[temp3], 0(%[frow]) \n\t" 141af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "add %[base], %[base], %[temp3] \n\t" 142af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "sw %[base], 0(%[irow]) \n\t" 143af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addu %[irow], %[irow], %[x_stride1] \n\t" 144af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addu %[frow], %[frow], %[x_stride1] \n\t" 145af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "bgtz %[loop_c], 1b \n\t" 146af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora 147af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora : [accum] "=&r" (accum), [src1] "+r" (src1), [temp3] "=&r" (temp3), 148af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora [sum] "=&r" (sum), [base] "=&r" (base), [frac] "=&r" (frac), 149af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora [frow] "+r" (frow), [irow] "+r" (irow), [accum1] "=&r" (accum1), 150af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora [temp2] "=&r" (temp2), [temp1] "=&r" (temp1) 151af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora : [x_stride] "r" (x_stride), [fx_scale] "r" (fx_scale), 152af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora [x_sub] "r" (x_sub), [x_add] "r" (x_add), 153af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora [loop_c] "r" (loop_c), [x_stride1] "r" (x_stride1) 154af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora : "memory", "hi", "lo" 155af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora ); 156a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora } else { 157af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora __asm__ volatile ( 158af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "lbu %[temp1], 0(%[src1]) \n\t" 159af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "move %[temp2], %[temp1] \n\t" 160af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "li %[accum], 0 \n\t" 161af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "1: \n\t" 162af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "bgez %[accum], 2f \n\t" 163af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "move %[temp2], %[temp1] \n\t" 164af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addu %[src1], %[x_stride] \n\t" 165af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "lbu %[temp1], 0(%[src1]) \n\t" 166af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addu %[accum], %[x_add] \n\t" 167af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "2: \n\t" 168af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "subu %[temp3], %[temp2], %[temp1] \n\t" 169af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "mul %[temp3], %[temp3], %[accum] \n\t" 170af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "mul %[base], %[temp1], %[x_add] \n\t" 171af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "subu %[accum], %[accum], %[x_sub] \n\t" 172af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "lw %[frac], 0(%[irow]) \n\t" 173af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "subu %[loop_c], %[loop_c], %[x_stride] \n\t" 174af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addu %[temp3], %[base], %[temp3] \n\t" 175af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "sw %[temp3], 0(%[frow]) \n\t" 176af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addu %[frow], %[x_stride1] \n\t" 177af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addu %[frac], %[temp3] \n\t" 178af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "sw %[frac], 0(%[irow]) \n\t" 179af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addu %[irow], %[x_stride1] \n\t" 180af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "bgtz %[loop_c], 1b \n\t" 181af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora 182af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora : [src1] "+r" (src1), [accum] "=&r" (accum), [temp1] "=&r" (temp1), 183af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), [base] "=&r" (base), 184af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora [frac] "=&r" (frac), [frow] "+r" (frow), [irow] "+r" (irow) 185af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora : [x_stride] "r" (x_stride), [x_add] "r" (x_add), [x_sub] "r" (x_sub), 186af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora [x_stride1] "r" (x_stride1), [loop_c] "r" (loop_c) 187af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora : "memory", "hi", "lo" 188af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora ); 189af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora } 190af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora} 191af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora 192af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arorastatic void ExportRowMIPS(WebPRescaler* const wrk, int x_out) { 193af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora if (wrk->y_accum <= 0) { 194af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora uint8_t* const dst = wrk->dst; 195af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora int32_t* const irow = wrk->irow; 196af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora const int32_t* const frow = wrk->frow; 197af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora const int yscale = wrk->fy_scale * (-wrk->y_accum); 198af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora const int x_out_max = wrk->dst_width * wrk->num_channels; 199af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora // if wrk->fxy_scale can fit into 32 bits use optimized code, 200af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora // otherwise use C code 201af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora if ((wrk->fxy_scale >> 32) == 0) { 202af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora int temp0, temp1, temp3, temp4, temp5, temp6, temp7, loop_end; 203af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora const int temp2 = (int)(wrk->fxy_scale); 204af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora const int temp8 = x_out_max << 2; 205af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora uint8_t* dst_t = (uint8_t*)dst; 206af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora int32_t* irow_t = (int32_t*)irow; 207af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora const int32_t* frow_t = (const int32_t*)frow; 208af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora 209af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora __asm__ volatile( 210af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addiu %[temp6], $zero, -256 \n\t" 211af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addiu %[temp7], $zero, 255 \n\t" 212af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "li %[temp3], 0x10000 \n\t" 213af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "li %[temp4], 0x8000 \n\t" 214af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addu %[loop_end], %[frow_t], %[temp8] \n\t" 215af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "1: \n\t" 216af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "lw %[temp0], 0(%[frow_t]) \n\t" 217af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "mult %[temp3], %[temp4] \n\t" 218af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addiu %[frow_t], %[frow_t], 4 \n\t" 219af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "sll %[temp0], %[temp0], 2 \n\t" 220af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "madd %[temp0], %[yscale] \n\t" 221af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "mfhi %[temp1] \n\t" 222af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "lw %[temp0], 0(%[irow_t]) \n\t" 223af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addiu %[dst_t], %[dst_t], 1 \n\t" 224af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "addiu %[irow_t], %[irow_t], 4 \n\t" 225af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "subu %[temp0], %[temp0], %[temp1] \n\t" 226af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "mult %[temp3], %[temp4] \n\t" 227af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "sll %[temp0], %[temp0], 2 \n\t" 228af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "madd %[temp0], %[temp2] \n\t" 229af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "mfhi %[temp5] \n\t" 230af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "sw %[temp1], -4(%[irow_t]) \n\t" 231af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "and %[temp0], %[temp5], %[temp6] \n\t" 232af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "slti %[temp1], %[temp5], 0 \n\t" 233af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "beqz %[temp0], 2f \n\t" 234af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "xor %[temp5], %[temp5], %[temp5] \n\t" 235af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "movz %[temp5], %[temp7], %[temp1] \n\t" 236af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "2: \n\t" 237af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "sb %[temp5], -1(%[dst_t]) \n\t" 238af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora "bne %[frow_t], %[loop_end], 1b \n\t" 239af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora 240af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), 241af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), 242af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora [temp7]"=&r"(temp7), [frow_t]"+r"(frow_t), [irow_t]"+r"(irow_t), 243af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora [dst_t]"+r"(dst_t), [loop_end]"=&r"(loop_end) 244af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora : [temp2]"r"(temp2), [yscale]"r"(yscale), [temp8]"r"(temp8) 245af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora : "memory", "hi", "lo" 246af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora ); 247af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->y_accum += wrk->y_add; 248af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->dst += wrk->dst_stride; 249af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora } else { 250af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora ExportRowC(wrk, x_out); 251af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora } 252af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora } 253af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora} 254af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#endif // WEBP_USE_MIPS32 255af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora 256af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora//------------------------------------------------------------------------------ 257af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora 258af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Aroravoid WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height, 259af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora uint8_t* const dst, int dst_width, int dst_height, 260af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora int dst_stride, int num_channels, int x_add, int x_sub, 261af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora int y_add, int y_sub, int32_t* const work) { 262af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->x_expand = (src_width < dst_width); 263af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->src_width = src_width; 264af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->src_height = src_height; 265af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->dst_width = dst_width; 266af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->dst_height = dst_height; 267af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->dst = dst; 268af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->dst_stride = dst_stride; 269af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->num_channels = num_channels; 270af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora // for 'x_expand', we use bilinear interpolation 271af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub; 272af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub; 273af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->y_accum = y_add; 274af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->y_add = y_add; 275af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->y_sub = y_sub; 276af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->fx_scale = (1 << RFIX) / x_sub; 277af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->fy_scale = (1 << RFIX) / y_sub; 278af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->fxy_scale = wrk->x_expand ? 279af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora ((int64_t)dst_height << RFIX) / (x_sub * src_height) : 280af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora ((int64_t)dst_height << RFIX) / (x_add * src_height); 281af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->irow = work; 282af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora wrk->frow = work + num_channels * dst_width; 283af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora 284af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora if (WebPRescalerImportRow == NULL) { 285af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora WebPRescalerImportRow = ImportRowC; 286af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora WebPRescalerExportRow = ExportRowC; 287af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora if (VP8GetCPUInfo != NULL) { 288af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#if defined(WEBP_USE_MIPS32) 289af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora if (VP8GetCPUInfo(kMIPS32)) { 290af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora WebPRescalerImportRow = ImportRowMIPS; 291af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora WebPRescalerExportRow = ExportRowMIPS; 292af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora } 293af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#endif 294af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora } 295a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora } 296a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora} 297a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 298a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#undef MULT_FIX 299a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#undef RFIX 300a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 301a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------ 302a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// all-in-one calls 303a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 3048b720228d581a84fd173b6dcb2fa295b59db489aVikas Aroraint WebPRescaleNeededLines(const WebPRescaler* const wrk, int max_num_lines) { 3058b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora const int num_lines = (wrk->y_accum + wrk->y_sub - 1) / wrk->y_sub; 3068b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora return (num_lines > max_num_lines) ? max_num_lines : num_lines; 3078b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora} 3088b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora 309a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Aroraint WebPRescalerImport(WebPRescaler* const wrk, int num_lines, 310a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora const uint8_t* src, int src_stride) { 311a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora int total_imported = 0; 312a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora while (total_imported < num_lines && wrk->y_accum > 0) { 313a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora int channel; 314a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora for (channel = 0; channel < wrk->num_channels; ++channel) { 315a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora WebPRescalerImportRow(wrk, src, channel); 316a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora } 317a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora src += src_stride; 318a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora ++total_imported; 319a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora wrk->y_accum -= wrk->y_sub; 320a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora } 321a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora return total_imported; 322a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora} 323a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 324a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Aroraint WebPRescalerExport(WebPRescaler* const rescaler) { 325a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora int total_exported = 0; 326a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora while (WebPRescalerHasPendingOutput(rescaler)) { 327af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora WebPRescalerExportRow(rescaler, 0); 328a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora ++total_exported; 329a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora } 330a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora return total_exported; 331a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora} 332a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 333a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------ 334