1; Copyright (c) 2011 The Chromium Authors. All rights reserved.
2; Use of this source code is governed by a BSD-style license that can be
3; found in the LICENSE file.
4
5;
6; void SYMBOL(const uint8* argb, uint8* y, uint8* u, uint8* v, int width);
7;
8; The main code that converts RGB pixels to YUV pixels. This function roughly
9; consists of three parts: converting one ARGB pixel to YUV pixels, converting
10; two ARGB pixels to YUV pixels, and converting four ARGB pixels to YUV pixels.
11; To write the structure of this function in C, it becomes the snippet listed
12; below.
13;
14;   if (width & 1) {
15;     --width;
16;     // Convert one ARGB pixel to one Y pixel, one U pixel, and one V pixel.
17;   }
18;
19;   if (width & 2) {
20;     width -= 2;
21;     // Convert two ARGB pixels to two Y pixels, one U pixel, and one V pixel.
22;   }
23;
24;   while (width) {
25;     width -= 4;
26;     // Convert four ARGB pixels to four Y pixels, two U pixels, and two V
27;     // pixels.
28;   }
29;
30  EXPORT    SYMBOL
31  align     function_align
32
33mangle(SYMBOL):
34  %assign stack_offset 0
35  PROLOGUE 5, 6, 8, ARGB, Y, U, V, WIDTH, TEMP
36
37  ; Initialize constants used in this function. (We use immediates to avoid
38  ; dependency onto GOT.)
39  LOAD_XMM  XMM_CONST_Y0, 0x00420219
40  LOAD_XMM  XMM_CONST_Y1, 0x00007F00
41  LOAD_XMM  XMM_CONST_U, 0x00DAB670
42  LOAD_XMM  XMM_CONST_V, 0x0070A2EE
43  LOAD_XMM  XMM_CONST_128, 0x00800080
44
45.convert_one_pixel:
46  ; Divide the input width by two so it represents the offsets for u[] and v[].
47  ; When the width is odd, We read the rightmost ARGB pixel and convert its
48  ; colorspace to YUV. This code stores one Y pixel, one U pixel, and one V
49  ; pixel.
50  sar       WIDTHq, 1
51  jnc       .convert_two_pixels
52
53  ; Read one ARGB (or RGB) pixel.
54  READ_ARGB xmm0, 1
55
56  ; Calculate y[0] from one RGB pixel read above.
57  CALC_Y    xmm1, xmm0
58  movd      TEMPd, xmm1
59  mov       BYTE [Yq + WIDTHq * 2], TEMPb
60
61  ; Calculate u[0] from one RGB pixel read above. If this is an odd line, the
62  ; output pixel contains the U value calculated in the previous call. We also
63  ; read this pixel and calculate their average.
64  INIT_UV   TEMPd, Uq, 4
65  CALC_UV   xmm1, xmm0, XMM_CONST_U, TEMPd
66  movd      TEMPd, xmm1
67  mov       BYTE [Uq + WIDTHq], TEMPb
68
69  ; Calculate v[0] from one RGB pixel. Same as u[0], we read the result of the
70  ; previous call and get their average.
71  INIT_UV   TEMPd, Uq, 4
72  CALC_UV   xmm1, xmm0, XMM_CONST_V, TEMPd
73  movd      TEMPd, xmm1
74  mov       BYTE [Vq + WIDTHq], TEMPb
75
76.convert_two_pixels:
77  ; If the input width is not a multiple of four, read the rightmost two ARGB
78  ; pixels and convert their colorspace to YUV. This code stores two Y pixels,
79  ; one U pixel, and one V pixel.
80  test      WIDTHb, 2 / 2
81  jz        .convert_four_pixels
82  sub       WIDTHb, 2 / 2
83
84  ; Read two ARGB (or RGB) pixels.
85  READ_ARGB xmm0, 2
86
87  ; Calculate r[0] and r[1] from two RGB pixels read above.
88  CALC_Y    xmm1, xmm0
89  movd      TEMPd, xmm1
90  mov       WORD [Yq + WIDTHq * 2], TEMPw
91
92  ; Skip calculating u and v if the output buffer is NULL.
93  test      Uq, Uq
94  jz        .convert_four_pixels
95
96  ; Calculate u[0] from two RGB pixels read above. (For details, read the above
97  ; comment in .convert_one_pixel).
98  INIT_UV   TEMPd, Uq, 2
99  CALC_UV   xmm1, xmm0, XMM_CONST_U, TEMPd
100  movd      TEMPd, xmm1
101  mov       BYTE [Uq + WIDTHq], TEMPb
102
103  ; Calculate v[0] from two RGB pixels read above.
104  INIT_UV   TEMPd, Vq, 2
105  CALC_UV   xmm1, xmm0, XMM_CONST_V, TEMPd
106  movd      TEMPd, xmm1
107  mov       BYTE [Vq + WIDTHq], TEMPb
108
109.convert_four_pixels:
110  ; Read four ARGB pixels and convert their colorspace to YUV. This code stores
111  ; four Y pixels, two U pixels, and two V pixels.
112  test      WIDTHq, WIDTHq
113  jz        .convert_finish
114
115%if PIXELSIZE == 4
116  ; Check if the input buffer is aligned to a 16-byte boundary and use movdqa
117  ; for reading the ARGB pixels.
118  test      ARGBw, 15
119  jnz       .convert_four_pixels_unaligned
120
121.convert_four_pixels_aligned:
122  sub       WIDTHq, 4 / 2
123
124  ; Read four ARGB pixels. (We can use movdqa here since we have checked if the
125  ; source address is aligned.)
126  movdqa    xmm0, DQWORD [ARGBq + WIDTHq * 4 * 2]
127
128  ; Calculate y[0], y[1], y[2],and, y[3] from the input ARGB pixels.
129  CALC_Y    xmm1, xmm0
130  movd      DWORD [Yq + WIDTHq * 2], xmm1
131
132%if SUBSAMPLING == 0
133  ; Skip calculating u and v if the output buffer is NULL, which means we are
134  ; converting an odd line. (When we enable subsampling, these buffers must
135  ; contain the u and v values for the previous call, i.e. these variables must
136  ; not be NULL.)
137  test      Uq, Uq
138  jz        .convert_four_pixels_aligned_next
139%endif
140
141  ; Calculate u[0] and u[1] from four ARGB pixels read above.
142  INIT_UV   TEMPd, Uq, 4
143  CALC_UV   xmm1, xmm0, XMM_CONST_U, TEMPd
144  movd      TEMPd, xmm1
145  mov       WORD [Uq + WIDTHq], TEMPw
146
147  ; Calculate v[0] and v[1] from four ARGB pixels read above.
148  INIT_UV   TEMPd, Vq, 4
149  CALC_UV   xmm1, xmm0, XMM_CONST_V, TEMPd
150  movd      TEMPd, xmm1
151  mov       WORD [Vq + WIDTHq], TEMPw
152
153%if SUBSAMPLING == 0
154.convert_four_pixels_aligned_next:
155%endif
156
157  test      WIDTHq, WIDTHq
158  jnz       .convert_four_pixels_aligned
159
160  jmp       .convert_finish
161%endif
162
163.convert_four_pixels_unaligned:
164  sub       WIDTHq, 4 / 2
165
166  ; Read four ARGB (or RGB) pixels.
167  READ_ARGB xmm0, 4
168
169  ; Calculate y[0], y[1], y[2],and, y[3] from the input ARGB pixels.
170  CALC_Y    xmm1, xmm0
171  movd      DWORD [Yq + WIDTHq * 2], xmm1
172
173%if SUBSAMPLING == 0
174  ; Skip calculating u and v if the output buffer is NULL.
175  test      Uq, Uq
176  jz        .convert_four_pixels_unaligned_next
177%endif
178
179  ; Calculate u[0] and u[1] from the input ARGB pixels.
180  INIT_UV   TEMPd, Uq, 4
181  CALC_UV   xmm1, xmm0, XMM_CONST_U, TEMPd
182  movd      TEMPd, xmm1
183  mov       WORD [Uq + WIDTHq], TEMPw
184
185  ; Calculate v[0] and v[1] from the input ARGB pixels.
186  INIT_UV   TEMPd, Vq, 4
187  CALC_UV   xmm1, xmm0, XMM_CONST_V, TEMPd
188  movd      TEMPd, xmm1
189  mov       WORD [Vq + WIDTHq], TEMPw
190
191%if SUBSAMPLING == 0
192.convert_four_pixels_unaligned_next:
193%endif
194
195  test      WIDTHq, WIDTHq
196  jnz       .convert_four_pixels_unaligned
197
198.convert_finish:
199  ; Just exit this function since this is a void function.
200  RET
201