1/*
2 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS. All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <stdlib.h>
12#include <time.h>
13
14#include "../unit_test/unit_test.h"
15#include "libyuv/convert_argb.h"
16#include "libyuv/cpu_id.h"
17#include "libyuv/scale_argb.h"
18#include "libyuv/video_common.h"
19
20namespace libyuv {
21
22#define STRINGIZE(line) #line
23#define FILELINESTR(file, line) file ":" STRINGIZE(line)
24
25// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
26static int ARGBTestFilter(int src_width,
27                          int src_height,
28                          int dst_width,
29                          int dst_height,
30                          FilterMode f,
31                          int benchmark_iterations,
32                          int disable_cpu_flags,
33                          int benchmark_cpu_info) {
34  if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
35    return 0;
36  }
37
38  int i, j;
39  const int b = 0;  // 128 to test for padding/stride.
40  int64 src_argb_plane_size =
41      (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4LL;
42  int src_stride_argb = (b * 2 + Abs(src_width)) * 4;
43
44  align_buffer_page_end(src_argb, src_argb_plane_size);
45  if (!src_argb) {
46    printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
47    return 0;
48  }
49  MemRandomize(src_argb, src_argb_plane_size);
50
51  int64 dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4LL;
52  int dst_stride_argb = (b * 2 + dst_width) * 4;
53
54  align_buffer_page_end(dst_argb_c, dst_argb_plane_size);
55  align_buffer_page_end(dst_argb_opt, dst_argb_plane_size);
56  if (!dst_argb_c || !dst_argb_opt) {
57    printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
58    return 0;
59  }
60  memset(dst_argb_c, 2, dst_argb_plane_size);
61  memset(dst_argb_opt, 3, dst_argb_plane_size);
62
63  // Warm up both versions for consistent benchmarks.
64  MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
65  ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
66            src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4,
67            dst_stride_argb, dst_width, dst_height, f);
68  MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
69  ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
70            src_width, src_height, dst_argb_opt + (dst_stride_argb * b) + b * 4,
71            dst_stride_argb, dst_width, dst_height, f);
72
73  MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
74  double c_time = get_time();
75  ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
76            src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4,
77            dst_stride_argb, dst_width, dst_height, f);
78
79  c_time = (get_time() - c_time);
80
81  MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
82  double opt_time = get_time();
83  for (i = 0; i < benchmark_iterations; ++i) {
84    ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
85              src_width, src_height,
86              dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb,
87              dst_width, dst_height, f);
88  }
89  opt_time = (get_time() - opt_time) / benchmark_iterations;
90
91  // Report performance of C vs OPT
92  printf("filter %d - %8d us C - %8d us OPT\n", f,
93         static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
94
95  // C version may be a little off from the optimized. Order of
96  //  operations may introduce rounding somewhere. So do a difference
97  //  of the buffers and look to see that the max difference isn't
98  //  over 2.
99  int max_diff = 0;
100  for (i = b; i < (dst_height + b); ++i) {
101    for (j = b * 4; j < (dst_width + b) * 4; ++j) {
102      int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] -
103                         dst_argb_opt[(i * dst_stride_argb) + j]);
104      if (abs_diff > max_diff) {
105        max_diff = abs_diff;
106      }
107    }
108  }
109
110  free_aligned_buffer_page_end(dst_argb_c);
111  free_aligned_buffer_page_end(dst_argb_opt);
112  free_aligned_buffer_page_end(src_argb);
113  return max_diff;
114}
115
116static const int kTileX = 8;
117static const int kTileY = 8;
118
119static int TileARGBScale(const uint8* src_argb,
120                         int src_stride_argb,
121                         int src_width,
122                         int src_height,
123                         uint8* dst_argb,
124                         int dst_stride_argb,
125                         int dst_width,
126                         int dst_height,
127                         FilterMode filtering) {
128  for (int y = 0; y < dst_height; y += kTileY) {
129    for (int x = 0; x < dst_width; x += kTileX) {
130      int clip_width = kTileX;
131      if (x + clip_width > dst_width) {
132        clip_width = dst_width - x;
133      }
134      int clip_height = kTileY;
135      if (y + clip_height > dst_height) {
136        clip_height = dst_height - y;
137      }
138      int r = ARGBScaleClip(src_argb, src_stride_argb, src_width, src_height,
139                            dst_argb, dst_stride_argb, dst_width, dst_height, x,
140                            y, clip_width, clip_height, filtering);
141      if (r) {
142        return r;
143      }
144    }
145  }
146  return 0;
147}
148
149static int ARGBClipTestFilter(int src_width,
150                              int src_height,
151                              int dst_width,
152                              int dst_height,
153                              FilterMode f,
154                              int benchmark_iterations) {
155  if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
156    return 0;
157  }
158
159  const int b = 128;
160  int64 src_argb_plane_size =
161      (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4;
162  int src_stride_argb = (b * 2 + Abs(src_width)) * 4;
163
164  align_buffer_page_end(src_argb, src_argb_plane_size);
165  if (!src_argb) {
166    printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
167    return 0;
168  }
169  memset(src_argb, 1, src_argb_plane_size);
170
171  int64 dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4;
172  int dst_stride_argb = (b * 2 + dst_width) * 4;
173
174  int i, j;
175  for (i = b; i < (Abs(src_height) + b); ++i) {
176    for (j = b; j < (Abs(src_width) + b) * 4; ++j) {
177      src_argb[(i * src_stride_argb) + j] = (fastrand() & 0xff);
178    }
179  }
180
181  align_buffer_page_end(dst_argb_c, dst_argb_plane_size);
182  align_buffer_page_end(dst_argb_opt, dst_argb_plane_size);
183  if (!dst_argb_c || !dst_argb_opt) {
184    printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
185    return 0;
186  }
187  memset(dst_argb_c, 2, dst_argb_plane_size);
188  memset(dst_argb_opt, 3, dst_argb_plane_size);
189
190  // Do full image, no clipping.
191  double c_time = get_time();
192  ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
193            src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4,
194            dst_stride_argb, dst_width, dst_height, f);
195  c_time = (get_time() - c_time);
196
197  // Do tiled image, clipping scale to a tile at a time.
198  double opt_time = get_time();
199  for (i = 0; i < benchmark_iterations; ++i) {
200    TileARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
201                  src_width, src_height,
202                  dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb,
203                  dst_width, dst_height, f);
204  }
205  opt_time = (get_time() - opt_time) / benchmark_iterations;
206
207  // Report performance of Full vs Tiled.
208  printf("filter %d - %8d us Full - %8d us Tiled\n", f,
209         static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
210
211  // Compare full scaled image vs tiled image.
212  int max_diff = 0;
213  for (i = b; i < (dst_height + b); ++i) {
214    for (j = b * 4; j < (dst_width + b) * 4; ++j) {
215      int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] -
216                         dst_argb_opt[(i * dst_stride_argb) + j]);
217      if (abs_diff > max_diff) {
218        max_diff = abs_diff;
219      }
220    }
221  }
222
223  free_aligned_buffer_page_end(dst_argb_c);
224  free_aligned_buffer_page_end(dst_argb_opt);
225  free_aligned_buffer_page_end(src_argb);
226  return max_diff;
227}
228
229// The following adjustments in dimensions ensure the scale factor will be
230// exactly achieved.
231#define DX(x, nom, denom) static_cast<int>((Abs(x) / nom) * nom)
232#define SX(x, nom, denom) static_cast<int>((x / nom) * denom)
233
234#define TEST_FACTOR1(name, filter, nom, denom, max_diff)                     \
235  TEST_F(LibYUVScaleTest, ARGBScaleDownBy##name##_##filter) {                \
236    int diff = ARGBTestFilter(                                               \
237        SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
238        DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
239        kFilter##filter, benchmark_iterations_, disable_cpu_flags_,          \
240        benchmark_cpu_info_);                                                \
241    EXPECT_LE(diff, max_diff);                                               \
242  }                                                                          \
243  TEST_F(LibYUVScaleTest, ARGBScaleDownClipBy##name##_##filter) {            \
244    int diff = ARGBClipTestFilter(                                           \
245        SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
246        DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
247        kFilter##filter, benchmark_iterations_);                             \
248    EXPECT_LE(diff, max_diff);                                               \
249  }
250
251// Test a scale factor with all 4 filters.  Expect unfiltered to be exact, but
252// filtering is different fixed point implementations for SSSE3, Neon and C.
253#define TEST_FACTOR(name, nom, denom)         \
254  TEST_FACTOR1(name, None, nom, denom, 0)     \
255  TEST_FACTOR1(name, Linear, nom, denom, 3)   \
256  TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
257  TEST_FACTOR1(name, Box, nom, denom, 3)
258
259TEST_FACTOR(2, 1, 2)
260TEST_FACTOR(4, 1, 4)
261TEST_FACTOR(8, 1, 8)
262TEST_FACTOR(3by4, 3, 4)
263TEST_FACTOR(3by8, 3, 8)
264TEST_FACTOR(3, 1, 3)
265#undef TEST_FACTOR1
266#undef TEST_FACTOR
267#undef SX
268#undef DX
269
270#define TEST_SCALETO1(name, width, height, filter, max_diff)                   \
271  TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) {             \
272    int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, width,      \
273                              height, kFilter##filter, benchmark_iterations_,  \
274                              disable_cpu_flags_, benchmark_cpu_info_);        \
275    EXPECT_LE(diff, max_diff);                                                 \
276  }                                                                            \
277  TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) {           \
278    int diff = ARGBTestFilter(width, height, Abs(benchmark_width_),            \
279                              Abs(benchmark_height_), kFilter##filter,         \
280                              benchmark_iterations_, disable_cpu_flags_,       \
281                              benchmark_cpu_info_);                            \
282    EXPECT_LE(diff, max_diff);                                                 \
283  }                                                                            \
284  TEST_F(LibYUVScaleTest, name##ClipTo##width##x##height##_##filter) {         \
285    int diff =                                                                 \
286        ARGBClipTestFilter(benchmark_width_, benchmark_height_, width, height, \
287                           kFilter##filter, benchmark_iterations_);            \
288    EXPECT_LE(diff, max_diff);                                                 \
289  }                                                                            \
290  TEST_F(LibYUVScaleTest, name##ClipFrom##width##x##height##_##filter) {       \
291    int diff = ARGBClipTestFilter(width, height, Abs(benchmark_width_),        \
292                                  Abs(benchmark_height_), kFilter##filter,     \
293                                  benchmark_iterations_);                      \
294    EXPECT_LE(diff, max_diff);                                                 \
295  }
296
297/// Test scale to a specified size with all 4 filters.
298#define TEST_SCALETO(name, width, height)       \
299  TEST_SCALETO1(name, width, height, None, 0)   \
300  TEST_SCALETO1(name, width, height, Linear, 3) \
301  TEST_SCALETO1(name, width, height, Bilinear, 3)
302
303TEST_SCALETO(ARGBScale, 1, 1)
304TEST_SCALETO(ARGBScale, 320, 240)
305TEST_SCALETO(ARGBScale, 352, 288)
306TEST_SCALETO(ARGBScale, 569, 480)
307TEST_SCALETO(ARGBScale, 640, 360)
308TEST_SCALETO(ARGBScale, 1280, 720)
309#undef TEST_SCALETO1
310#undef TEST_SCALETO
311
312// Scale with YUV conversion to ARGB and clipping.
313LIBYUV_API
314int YUVToARGBScaleReference2(const uint8* src_y,
315                             int src_stride_y,
316                             const uint8* src_u,
317                             int src_stride_u,
318                             const uint8* src_v,
319                             int src_stride_v,
320                             uint32 /* src_fourcc */,  // TODO: Add support.
321                             int src_width,
322                             int src_height,
323                             uint8* dst_argb,
324                             int dst_stride_argb,
325                             uint32 /* dst_fourcc */,  // TODO: Add support.
326                             int dst_width,
327                             int dst_height,
328                             int clip_x,
329                             int clip_y,
330                             int clip_width,
331                             int clip_height,
332                             enum FilterMode filtering) {
333  uint8* argb_buffer = static_cast<uint8*>(malloc(src_width * src_height * 4));
334  int r;
335  I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
336             argb_buffer, src_width * 4, src_width, src_height);
337
338  r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb,
339                    dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
340                    clip_width, clip_height, filtering);
341  free(argb_buffer);
342  return r;
343}
344
345static void FillRamp(uint8* buf, int width, int height, int v, int dx, int dy) {
346  int rv = v;
347  for (int y = 0; y < height; ++y) {
348    for (int x = 0; x < width; ++x) {
349      *buf++ = v;
350      v += dx;
351      if (v < 0 || v > 255) {
352        dx = -dx;
353        v += dx;
354      }
355    }
356    v = rv + dy;
357    if (v < 0 || v > 255) {
358      dy = -dy;
359      v += dy;
360    }
361    rv = v;
362  }
363}
364
365// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
366static int YUVToARGBTestFilter(int src_width,
367                               int src_height,
368                               int dst_width,
369                               int dst_height,
370                               FilterMode f,
371                               int benchmark_iterations) {
372  int64 src_y_plane_size = Abs(src_width) * Abs(src_height);
373  int64 src_uv_plane_size =
374      ((Abs(src_width) + 1) / 2) * ((Abs(src_height) + 1) / 2);
375  int src_stride_y = Abs(src_width);
376  int src_stride_uv = (Abs(src_width) + 1) / 2;
377
378  align_buffer_page_end(src_y, src_y_plane_size);
379  align_buffer_page_end(src_u, src_uv_plane_size);
380  align_buffer_page_end(src_v, src_uv_plane_size);
381
382  int64 dst_argb_plane_size = (dst_width) * (dst_height)*4LL;
383  int dst_stride_argb = (dst_width)*4;
384  align_buffer_page_end(dst_argb_c, dst_argb_plane_size);
385  align_buffer_page_end(dst_argb_opt, dst_argb_plane_size);
386  if (!dst_argb_c || !dst_argb_opt || !src_y || !src_u || !src_v) {
387    printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
388    return 0;
389  }
390  // Fill YUV image with continuous ramp, which is less sensitive to
391  // subsampling and filtering differences for test purposes.
392  FillRamp(src_y, Abs(src_width), Abs(src_height), 128, 1, 1);
393  FillRamp(src_u, (Abs(src_width) + 1) / 2, (Abs(src_height) + 1) / 2, 3, 1, 1);
394  FillRamp(src_v, (Abs(src_width) + 1) / 2, (Abs(src_height) + 1) / 2, 4, 1, 1);
395  memset(dst_argb_c, 2, dst_argb_plane_size);
396  memset(dst_argb_opt, 3, dst_argb_plane_size);
397
398  YUVToARGBScaleReference2(src_y, src_stride_y, src_u, src_stride_uv, src_v,
399                           src_stride_uv, libyuv::FOURCC_I420, src_width,
400                           src_height, dst_argb_c, dst_stride_argb,
401                           libyuv::FOURCC_I420, dst_width, dst_height, 0, 0,
402                           dst_width, dst_height, f);
403
404  for (int i = 0; i < benchmark_iterations; ++i) {
405    YUVToARGBScaleClip(src_y, src_stride_y, src_u, src_stride_uv, src_v,
406                       src_stride_uv, libyuv::FOURCC_I420, src_width,
407                       src_height, dst_argb_opt, dst_stride_argb,
408                       libyuv::FOURCC_I420, dst_width, dst_height, 0, 0,
409                       dst_width, dst_height, f);
410  }
411  int max_diff = 0;
412  for (int i = 0; i < dst_height; ++i) {
413    for (int j = 0; j < dst_width * 4; ++j) {
414      int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] -
415                         dst_argb_opt[(i * dst_stride_argb) + j]);
416      if (abs_diff > max_diff) {
417        printf("error %d at %d,%d c %d opt %d", abs_diff, j, i,
418               dst_argb_c[(i * dst_stride_argb) + j],
419               dst_argb_opt[(i * dst_stride_argb) + j]);
420        EXPECT_LE(abs_diff, 40);
421        max_diff = abs_diff;
422      }
423    }
424  }
425
426  free_aligned_buffer_page_end(dst_argb_c);
427  free_aligned_buffer_page_end(dst_argb_opt);
428  free_aligned_buffer_page_end(src_y);
429  free_aligned_buffer_page_end(src_u);
430  free_aligned_buffer_page_end(src_v);
431  return max_diff;
432}
433
434TEST_F(LibYUVScaleTest, YUVToRGBScaleUp) {
435  int diff =
436      YUVToARGBTestFilter(benchmark_width_, benchmark_height_,
437                          benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2,
438                          libyuv::kFilterBilinear, benchmark_iterations_);
439  EXPECT_LE(diff, 10);
440}
441
442TEST_F(LibYUVScaleTest, YUVToRGBScaleDown) {
443  int diff = YUVToARGBTestFilter(
444      benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2, benchmark_width_,
445      benchmark_height_, libyuv::kFilterBilinear, benchmark_iterations_);
446  EXPECT_LE(diff, 10);
447}
448
449}  // namespace libyuv
450