1/*
2 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS. All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <stdlib.h>
12#include <time.h>
13
14#include "libyuv/cpu_id.h"
15#include "libyuv/scale.h"
16#include "../unit_test/unit_test.h"
17
18namespace libyuv {
19
20// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
21static int TestFilter(int src_width, int src_height,
22                      int dst_width, int dst_height,
23                      FilterMode f, int benchmark_iterations) {
24  int i, j;
25  const int b = 0;  // 128 to test for padding/stride.
26  int src_width_uv = (Abs(src_width) + 1) >> 1;
27  int src_height_uv = (Abs(src_height) + 1) >> 1;
28
29  int src_y_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2);
30  int src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2);
31
32  int src_stride_y = b * 2 + Abs(src_width);
33  int src_stride_uv = b * 2 + src_width_uv;
34
35  align_buffer_page_end(src_y, src_y_plane_size)
36  align_buffer_page_end(src_u, src_uv_plane_size)
37  align_buffer_page_end(src_v, src_uv_plane_size)
38  srandom(time(NULL));
39  MemRandomize(src_y, src_y_plane_size);
40  MemRandomize(src_u, src_uv_plane_size);
41  MemRandomize(src_v, src_uv_plane_size);
42
43  int dst_width_uv = (dst_width + 1) >> 1;
44  int dst_height_uv = (dst_height + 1) >> 1;
45
46  int dst_y_plane_size = (dst_width + b * 2) * (dst_height + b * 2);
47  int dst_uv_plane_size = (dst_width_uv + b * 2) * (dst_height_uv + b * 2);
48
49  int dst_stride_y = b * 2 + dst_width;
50  int dst_stride_uv = b * 2 + dst_width_uv;
51
52  align_buffer_page_end(dst_y_c, dst_y_plane_size)
53  align_buffer_page_end(dst_u_c, dst_uv_plane_size)
54  align_buffer_page_end(dst_v_c, dst_uv_plane_size)
55  align_buffer_page_end(dst_y_opt, dst_y_plane_size)
56  align_buffer_page_end(dst_u_opt, dst_uv_plane_size)
57  align_buffer_page_end(dst_v_opt, dst_uv_plane_size)
58
59
60  MaskCpuFlags(0);  // Disable all CPU optimization.
61  double c_time = get_time();
62  I420Scale(src_y + (src_stride_y * b) + b, src_stride_y,
63            src_u + (src_stride_uv * b) + b, src_stride_uv,
64            src_v + (src_stride_uv * b) + b, src_stride_uv,
65            src_width, src_height,
66            dst_y_c + (dst_stride_y * b) + b, dst_stride_y,
67            dst_u_c + (dst_stride_uv * b) + b, dst_stride_uv,
68            dst_v_c + (dst_stride_uv * b) + b, dst_stride_uv,
69            dst_width, dst_height, f);
70  c_time = (get_time() - c_time);
71
72  MaskCpuFlags(-1);  // Enable all CPU optimization.
73  double opt_time = get_time();
74  for (i = 0; i < benchmark_iterations; ++i) {
75    I420Scale(src_y + (src_stride_y * b) + b, src_stride_y,
76              src_u + (src_stride_uv * b) + b, src_stride_uv,
77              src_v + (src_stride_uv * b) + b, src_stride_uv,
78              src_width, src_height,
79              dst_y_opt + (dst_stride_y * b) + b, dst_stride_y,
80              dst_u_opt + (dst_stride_uv * b) + b, dst_stride_uv,
81              dst_v_opt + (dst_stride_uv * b) + b, dst_stride_uv,
82              dst_width, dst_height, f);
83  }
84  opt_time = (get_time() - opt_time) / benchmark_iterations;
85  // Report performance of C vs OPT
86  printf("filter %d - %8d us C - %8d us OPT\n",
87         f,
88         static_cast<int>(c_time * 1e6),
89         static_cast<int>(opt_time * 1e6));
90
91  // C version may be a little off from the optimized. Order of
92  //  operations may introduce rounding somewhere. So do a difference
93  //  of the buffers and look to see that the max difference isn't
94  //  over 2.
95  int max_diff = 0;
96  for (i = b; i < (dst_height + b); ++i) {
97    for (j = b; j < (dst_width + b); ++j) {
98      int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
99                         dst_y_opt[(i * dst_stride_y) + j]);
100      if (abs_diff > max_diff) {
101        max_diff = abs_diff;
102      }
103    }
104  }
105
106  for (i = b; i < (dst_height_uv + b); ++i) {
107    for (j = b; j < (dst_width_uv + b); ++j) {
108      int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
109                         dst_u_opt[(i * dst_stride_uv) + j]);
110      if (abs_diff > max_diff) {
111        max_diff = abs_diff;
112      }
113      abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
114                     dst_v_opt[(i * dst_stride_uv) + j]);
115      if (abs_diff > max_diff) {
116        max_diff = abs_diff;
117      }
118    }
119  }
120
121  free_aligned_buffer_page_end(dst_y_c)
122  free_aligned_buffer_page_end(dst_u_c)
123  free_aligned_buffer_page_end(dst_v_c)
124  free_aligned_buffer_page_end(dst_y_opt)
125  free_aligned_buffer_page_end(dst_u_opt)
126  free_aligned_buffer_page_end(dst_v_opt)
127
128  free_aligned_buffer_page_end(src_y)
129  free_aligned_buffer_page_end(src_u)
130  free_aligned_buffer_page_end(src_v)
131
132  return max_diff;
133}
134
135// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
136// 0 = exact.
137static int TestFilter_16(int src_width, int src_height,
138                         int dst_width, int dst_height,
139                         FilterMode f, int benchmark_iterations) {
140  int i, j;
141  const int b = 0;  // 128 to test for padding/stride.
142  int src_width_uv = (Abs(src_width) + 1) >> 1;
143  int src_height_uv = (Abs(src_height) + 1) >> 1;
144
145  int src_y_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2);
146  int src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2);
147
148  int src_stride_y = b * 2 + Abs(src_width);
149  int src_stride_uv = b * 2 + src_width_uv;
150
151  align_buffer_page_end(src_y, src_y_plane_size)
152  align_buffer_page_end(src_u, src_uv_plane_size)
153  align_buffer_page_end(src_v, src_uv_plane_size)
154  align_buffer_page_end(src_y_16, src_y_plane_size * 2)
155  align_buffer_page_end(src_u_16, src_uv_plane_size * 2)
156  align_buffer_page_end(src_v_16, src_uv_plane_size * 2)
157  uint16* p_src_y_16 = reinterpret_cast<uint16*>(src_y_16);
158  uint16* p_src_u_16 = reinterpret_cast<uint16*>(src_u_16);
159  uint16* p_src_v_16 = reinterpret_cast<uint16*>(src_v_16);
160
161  srandom(time(NULL));
162  MemRandomize(src_y, src_y_plane_size);
163  MemRandomize(src_u, src_uv_plane_size);
164  MemRandomize(src_v, src_uv_plane_size);
165
166  for (i = b; i < src_height + b; ++i) {
167    for (j = b; j < src_width + b; ++j) {
168      p_src_y_16[(i * src_stride_y) + j] = src_y[(i * src_stride_y) + j];
169    }
170  }
171
172  for (i = b; i < (src_height_uv + b); ++i) {
173    for (j = b; j < (src_width_uv + b); ++j) {
174      p_src_u_16[(i * src_stride_uv) + j] = src_u[(i * src_stride_uv) + j];
175      p_src_v_16[(i * src_stride_uv) + j] = src_v[(i * src_stride_uv) + j];
176    }
177  }
178
179  int dst_width_uv = (dst_width + 1) >> 1;
180  int dst_height_uv = (dst_height + 1) >> 1;
181
182  int dst_y_plane_size = (dst_width + b * 2) * (dst_height + b * 2);
183  int dst_uv_plane_size = (dst_width_uv + b * 2) * (dst_height_uv + b * 2);
184
185  int dst_stride_y = b * 2 + dst_width;
186  int dst_stride_uv = b * 2 + dst_width_uv;
187
188  align_buffer_page_end(dst_y_8, dst_y_plane_size)
189  align_buffer_page_end(dst_u_8, dst_uv_plane_size)
190  align_buffer_page_end(dst_v_8, dst_uv_plane_size)
191  align_buffer_page_end(dst_y_16, dst_y_plane_size * 2)
192  align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2)
193  align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2)
194
195  uint16* p_dst_y_16 = reinterpret_cast<uint16*>(dst_y_16);
196  uint16* p_dst_u_16 = reinterpret_cast<uint16*>(dst_u_16);
197  uint16* p_dst_v_16 = reinterpret_cast<uint16*>(dst_v_16);
198
199  I420Scale(src_y + (src_stride_y * b) + b, src_stride_y,
200            src_u + (src_stride_uv * b) + b, src_stride_uv,
201            src_v + (src_stride_uv * b) + b, src_stride_uv,
202            src_width, src_height,
203            dst_y_8 + (dst_stride_y * b) + b, dst_stride_y,
204            dst_u_8 + (dst_stride_uv * b) + b, dst_stride_uv,
205            dst_v_8 + (dst_stride_uv * b) + b, dst_stride_uv,
206            dst_width, dst_height, f);
207
208  for (i = 0; i < benchmark_iterations; ++i) {
209    I420Scale_16(p_src_y_16 + (src_stride_y * b) + b, src_stride_y,
210                 p_src_u_16 + (src_stride_uv * b) + b, src_stride_uv,
211                 p_src_v_16 + (src_stride_uv * b) + b, src_stride_uv,
212                 src_width, src_height,
213                 p_dst_y_16 + (dst_stride_y * b) + b, dst_stride_y,
214                 p_dst_u_16 + (dst_stride_uv * b) + b, dst_stride_uv,
215                 p_dst_v_16 + (dst_stride_uv * b) + b, dst_stride_uv,
216                 dst_width, dst_height, f);
217  }
218
219  // Expect an exact match
220  int max_diff = 0;
221  for (i = b; i < (dst_height + b); ++i) {
222    for (j = b; j < (dst_width + b); ++j) {
223      int abs_diff = Abs(dst_y_8[(i * dst_stride_y) + j] -
224                         p_dst_y_16[(i * dst_stride_y) + j]);
225      if (abs_diff > max_diff) {
226        max_diff = abs_diff;
227      }
228    }
229  }
230
231  for (i = b; i < (dst_height_uv + b); ++i) {
232    for (j = b; j < (dst_width_uv + b); ++j) {
233      int abs_diff = Abs(dst_u_8[(i * dst_stride_uv) + j] -
234                         p_dst_u_16[(i * dst_stride_uv) + j]);
235      if (abs_diff > max_diff) {
236        max_diff = abs_diff;
237      }
238      abs_diff = Abs(dst_v_8[(i * dst_stride_uv) + j] -
239                     p_dst_v_16[(i * dst_stride_uv) + j]);
240      if (abs_diff > max_diff) {
241        max_diff = abs_diff;
242      }
243    }
244  }
245
246  free_aligned_buffer_page_end(dst_y_8)
247  free_aligned_buffer_page_end(dst_u_8)
248  free_aligned_buffer_page_end(dst_v_8)
249  free_aligned_buffer_page_end(dst_y_16)
250  free_aligned_buffer_page_end(dst_u_16)
251  free_aligned_buffer_page_end(dst_v_16)
252
253  free_aligned_buffer_page_end(src_y)
254  free_aligned_buffer_page_end(src_u)
255  free_aligned_buffer_page_end(src_v)
256  free_aligned_buffer_page_end(src_y_16)
257  free_aligned_buffer_page_end(src_u_16)
258  free_aligned_buffer_page_end(src_v_16)
259
260  return max_diff;
261}
262
263#define TEST_FACTOR1(name, filter, hfactor, vfactor, max_diff)                 \
264    TEST_F(libyuvTest, ScaleDownBy##name##_##filter) {                         \
265      int diff = TestFilter(benchmark_width_, benchmark_height_,               \
266                            Abs(benchmark_width_) * hfactor,                   \
267                            Abs(benchmark_height_) * vfactor,                  \
268                            kFilter##filter, benchmark_iterations_);           \
269      EXPECT_LE(diff, max_diff);                                               \
270    }                                                                          \
271    TEST_F(libyuvTest, ScaleDownBy##name##_##filter##_16) {                    \
272      int diff = TestFilter_16(benchmark_width_, benchmark_height_,            \
273                               Abs(benchmark_width_) * hfactor,                \
274                               Abs(benchmark_height_) * vfactor,               \
275                               kFilter##filter, benchmark_iterations_);        \
276      EXPECT_LE(diff, max_diff);                                               \
277    }
278
279// Test a scale factor with all 4 filters.  Expect unfiltered to be exact, but
280// filtering is different fixed point implementations for SSSE3, Neon and C.
281#define TEST_FACTOR(name, hfactor, vfactor)                                    \
282    TEST_FACTOR1(name, None, hfactor, vfactor, 0)                              \
283    TEST_FACTOR1(name, Linear, hfactor, vfactor, 3)                            \
284    TEST_FACTOR1(name, Bilinear, hfactor, vfactor, 3)                          \
285    TEST_FACTOR1(name, Box, hfactor, vfactor, 3)                               \
286
287TEST_FACTOR(2, 1 / 2, 1 / 2)
288TEST_FACTOR(4, 1 / 4, 1 / 4)
289TEST_FACTOR(8, 1 / 8, 1 / 8)
290TEST_FACTOR(3by4, 3 / 4, 3 / 4)
291#undef TEST_FACTOR1
292#undef TEST_FACTOR
293
294#define TEST_SCALETO1(name, width, height, filter, max_diff)                   \
295    TEST_F(libyuvTest, name##To##width##x##height##_##filter) {                \
296      int diff = TestFilter(benchmark_width_, benchmark_height_,               \
297                            width, height,                                     \
298                            kFilter##filter, benchmark_iterations_);           \
299      EXPECT_LE(diff, max_diff);                                               \
300    }                                                                          \
301    TEST_F(libyuvTest, name##From##width##x##height##_##filter) {              \
302      int diff = TestFilter(width, height,                                     \
303                            Abs(benchmark_width_), Abs(benchmark_height_),     \
304                            kFilter##filter, benchmark_iterations_);           \
305      EXPECT_LE(diff, max_diff);                                               \
306    }                                                                          \
307    TEST_F(libyuvTest, name##To##width##x##height##_##filter##_16) {           \
308      int diff = TestFilter_16(benchmark_width_, benchmark_height_,            \
309                               width, height,                                  \
310                               kFilter##filter, benchmark_iterations_);        \
311      EXPECT_LE(diff, max_diff);                                               \
312    }                                                                          \
313    TEST_F(libyuvTest, name##From##width##x##height##_##filter##_16) {         \
314      int diff = TestFilter_16(width, height,                                  \
315                               Abs(benchmark_width_), Abs(benchmark_height_),  \
316                               kFilter##filter, benchmark_iterations_);        \
317      EXPECT_LE(diff, max_diff);                                               \
318    }
319
320// Test scale to a specified size with all 4 filters.
321#define TEST_SCALETO(name, width, height)                                      \
322    TEST_SCALETO1(name, width, height, None, 0)                                \
323    TEST_SCALETO1(name, width, height, Linear, 3)                              \
324    TEST_SCALETO1(name, width, height, Bilinear, 3)                            \
325    TEST_SCALETO1(name, width, height, Box, 3)
326
327TEST_SCALETO(Scale, 1, 1)
328TEST_SCALETO(Scale, 320, 240)
329TEST_SCALETO(Scale, 352, 288)
330TEST_SCALETO(Scale, 569, 480)
331TEST_SCALETO(Scale, 640, 360)
332TEST_SCALETO(Scale, 1280, 720)
333#undef TEST_SCALETO1
334#undef TEST_SCALETO
335
336}  // namespace libyuv
337