1// Copyright 2014 Google Inc. All Rights Reserved.
2//
3// Use of this source code is governed by a BSD-style license
4// that can be found in the COPYING file in the root of the source
5// tree. An additional intellectual property rights grant can be found
6// in the file PATENTS. All contributing project authors may
7// be found in the AUTHORS file in the root of the source tree.
8// -----------------------------------------------------------------------------
9//
10// Rescaling functions
11//
12// Author: Skal (pascal.massimino@gmail.com)
13
14#include <assert.h>
15
16#include "./dsp.h"
17#include "../utils/rescaler.h"
18
19//------------------------------------------------------------------------------
20// Implementations of critical functions ImportRow / ExportRow
21
22#define ROUNDER (WEBP_RESCALER_ONE >> 1)
23#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
24
25//------------------------------------------------------------------------------
26// Row import
27
28void WebPRescalerImportRowExpandC(WebPRescaler* const wrk, const uint8_t* src) {
29  const int x_stride = wrk->num_channels;
30  const int x_out_max = wrk->dst_width * wrk->num_channels;
31  int channel;
32  assert(!WebPRescalerInputDone(wrk));
33  assert(wrk->x_expand);
34  for (channel = 0; channel < x_stride; ++channel) {
35    int x_in = channel;
36    int x_out = channel;
37    // simple bilinear interpolation
38    int accum = wrk->x_add;
39    int left = src[x_in];
40    int right = (wrk->src_width > 1) ? src[x_in + x_stride] : left;
41    x_in += x_stride;
42    while (1) {
43      wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;
44      x_out += x_stride;
45      if (x_out >= x_out_max) break;
46      accum -= wrk->x_sub;
47      if (accum < 0) {
48        left = right;
49        x_in += x_stride;
50        assert(x_in < wrk->src_width * x_stride);
51        right = src[x_in];
52        accum += wrk->x_add;
53      }
54    }
55    assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0);
56  }
57}
58
59void WebPRescalerImportRowShrinkC(WebPRescaler* const wrk, const uint8_t* src) {
60  const int x_stride = wrk->num_channels;
61  const int x_out_max = wrk->dst_width * wrk->num_channels;
62  int channel;
63  assert(!WebPRescalerInputDone(wrk));
64  assert(!wrk->x_expand);
65  for (channel = 0; channel < x_stride; ++channel) {
66    int x_in = channel;
67    int x_out = channel;
68    uint32_t sum = 0;
69    int accum = 0;
70    while (x_out < x_out_max) {
71      uint32_t base = 0;
72      accum += wrk->x_add;
73      while (accum > 0) {
74        accum -= wrk->x_sub;
75        assert(x_in < wrk->src_width * x_stride);
76        base = src[x_in];
77        sum += base;
78        x_in += x_stride;
79      }
80      {        // Emit next horizontal pixel.
81        const rescaler_t frac = base * (-accum);
82        wrk->frow[x_out] = sum * wrk->x_sub - frac;
83        // fresh fractional start for next pixel
84        sum = (int)MULT_FIX(frac, wrk->fx_scale);
85      }
86      x_out += x_stride;
87    }
88    assert(accum == 0);
89  }
90}
91
92//------------------------------------------------------------------------------
93// Row export
94
95void WebPRescalerExportRowExpandC(WebPRescaler* const wrk) {
96  int x_out;
97  uint8_t* const dst = wrk->dst;
98  rescaler_t* const irow = wrk->irow;
99  const int x_out_max = wrk->dst_width * wrk->num_channels;
100  const rescaler_t* const frow = wrk->frow;
101  assert(!WebPRescalerOutputDone(wrk));
102  assert(wrk->y_accum <= 0);
103  assert(wrk->y_expand);
104  assert(wrk->y_sub != 0);
105  if (wrk->y_accum == 0) {
106    for (x_out = 0; x_out < x_out_max; ++x_out) {
107      const uint32_t J = frow[x_out];
108      const int v = (int)MULT_FIX(J, wrk->fy_scale);
109      assert(v >= 0 && v <= 255);
110      dst[x_out] = v;
111    }
112  } else {
113    const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
114    const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
115    for (x_out = 0; x_out < x_out_max; ++x_out) {
116      const uint64_t I = (uint64_t)A * frow[x_out]
117                       + (uint64_t)B * irow[x_out];
118      const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
119      const int v = (int)MULT_FIX(J, wrk->fy_scale);
120      assert(v >= 0 && v <= 255);
121      dst[x_out] = v;
122    }
123  }
124}
125
126void WebPRescalerExportRowShrinkC(WebPRescaler* const wrk) {
127  int x_out;
128  uint8_t* const dst = wrk->dst;
129  rescaler_t* const irow = wrk->irow;
130  const int x_out_max = wrk->dst_width * wrk->num_channels;
131  const rescaler_t* const frow = wrk->frow;
132  const uint32_t yscale = wrk->fy_scale * (-wrk->y_accum);
133  assert(!WebPRescalerOutputDone(wrk));
134  assert(wrk->y_accum <= 0);
135  assert(!wrk->y_expand);
136  if (yscale) {
137    for (x_out = 0; x_out < x_out_max; ++x_out) {
138      const uint32_t frac = (uint32_t)MULT_FIX(frow[x_out], yscale);
139      const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
140      assert(v >= 0 && v <= 255);
141      dst[x_out] = v;
142      irow[x_out] = frac;   // new fractional start
143    }
144  } else {
145    for (x_out = 0; x_out < x_out_max; ++x_out) {
146      const int v = (int)MULT_FIX(irow[x_out], wrk->fxy_scale);
147      assert(v >= 0 && v <= 255);
148      dst[x_out] = v;
149      irow[x_out] = 0;
150    }
151  }
152}
153
154#undef MULT_FIX
155#undef ROUNDER
156
157//------------------------------------------------------------------------------
158// Main entry calls
159
160void WebPRescalerImportRow(WebPRescaler* const wrk, const uint8_t* src) {
161  assert(!WebPRescalerInputDone(wrk));
162  if (!wrk->x_expand) {
163    WebPRescalerImportRowShrink(wrk, src);
164  } else {
165    WebPRescalerImportRowExpand(wrk, src);
166  }
167}
168
169void WebPRescalerExportRow(WebPRescaler* const wrk) {
170  if (wrk->y_accum <= 0) {
171    assert(!WebPRescalerOutputDone(wrk));
172    if (wrk->y_expand) {
173      WebPRescalerExportRowExpand(wrk);
174    } else if (wrk->fxy_scale) {
175      WebPRescalerExportRowShrink(wrk);
176    } else {  // very special case for src = dst = 1x1
177      int i;
178      assert(wrk->src_width == 1 && wrk->dst_width <= 2);
179      assert(wrk->src_height == 1 && wrk->dst_height == 1);
180      for (i = 0; i < wrk->num_channels * wrk->dst_width; ++i) {
181        wrk->dst[i] = wrk->irow[i];
182        wrk->irow[i] = 0;
183      }
184    }
185    wrk->y_accum += wrk->y_add;
186    wrk->dst += wrk->dst_stride;
187    ++wrk->dst_y;
188  }
189}
190
191//------------------------------------------------------------------------------
192
193WebPRescalerImportRowFunc WebPRescalerImportRowExpand;
194WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
195
196WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
197WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
198
199extern void WebPRescalerDspInitSSE2(void);
200extern void WebPRescalerDspInitMIPS32(void);
201extern void WebPRescalerDspInitMIPSdspR2(void);
202extern void WebPRescalerDspInitNEON(void);
203
204static volatile VP8CPUInfo rescaler_last_cpuinfo_used =
205    (VP8CPUInfo)&rescaler_last_cpuinfo_used;
206
207WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) {
208  if (rescaler_last_cpuinfo_used == VP8GetCPUInfo) return;
209
210  WebPRescalerImportRowExpand = WebPRescalerImportRowExpandC;
211  WebPRescalerImportRowShrink = WebPRescalerImportRowShrinkC;
212  WebPRescalerExportRowExpand = WebPRescalerExportRowExpandC;
213  WebPRescalerExportRowShrink = WebPRescalerExportRowShrinkC;
214
215  if (VP8GetCPUInfo != NULL) {
216#if defined(WEBP_USE_SSE2)
217    if (VP8GetCPUInfo(kSSE2)) {
218      WebPRescalerDspInitSSE2();
219    }
220#endif
221#if defined(WEBP_USE_NEON)
222    if (VP8GetCPUInfo(kNEON)) {
223      WebPRescalerDspInitNEON();
224    }
225#endif
226#if defined(WEBP_USE_MIPS32)
227    if (VP8GetCPUInfo(kMIPS32)) {
228      WebPRescalerDspInitMIPS32();
229    }
230#endif
231#if defined(WEBP_USE_MIPS_DSP_R2)
232    if (VP8GetCPUInfo(kMIPSdspR2)) {
233      WebPRescalerDspInitMIPSdspR2();
234    }
235#endif
236  }
237  rescaler_last_cpuinfo_used = VP8GetCPUInfo;
238}
239