1; Copyright (c) 2011 The Chromium Authors. All rights reserved.
2; Use of this source code is governed by a BSD-style license that can be
3; found in the LICENSE file.
4
5%include "media/base/simd/media_export.asm"
6
7  EXPORT    SYMBOL
8  align     function_align
9
10mangle(SYMBOL):
11  %assign   stack_offset 0
12  PROLOGUE  6, 7, 3, Y, U, V, ARGB, WIDTH, TABLE, TEMP
13
14  jmp       .convertend
15
16.convertloop:
17  movzx     TEMPd, BYTE [Uq]
18  movq      mm0, [TABLEq + 2048 + 8 * TEMPq]
19  add       Uq, 1
20
21  movzx     TEMPd, BYTE [Vq]
22  paddsw    mm0, [TABLEq + 4096 + 8 * TEMPq]
23  add       Vq, 1
24
25  movzx     TEMPd, BYTE [Yq]
26  movq      mm1, [TABLEq + 8 * TEMPq]
27
28  movzx     TEMPd, BYTE [Yq + 1]
29  movq      mm2, [TABLEq + 8 * TEMPq]
30  add       Yq, 2
31
32  ; Add UV components to Y component.
33  paddsw    mm1, mm0
34  paddsw    mm2, mm0
35
36  ; Down shift and then pack.
37  psraw     mm1, 6
38  psraw     mm2, 6
39  packuswb  mm1, mm2
40  MOVQ      [ARGBq], mm1
41  add       ARGBq, 8
42
43.convertend:
44  sub       WIDTHq, 2
45  jns       .convertloop
46
47  ; If number of pixels is odd then compute it.
48  and       WIDTHq, 1
49  jz        .convertdone
50
51  movzx     TEMPd, BYTE [Uq]
52  movq      mm0, [TABLEq + 2048 + 8 * TEMPq]
53  movzx     TEMPd, BYTE [Vq]
54  paddsw    mm0, [TABLEq + 4096 + 8 * TEMPq]
55  movzx     TEMPd, BYTE [Yq]
56  movq      mm1, [TABLEq + 8 * TEMPq]
57  paddsw    mm1, mm0
58  psraw     mm1, 6
59  packuswb  mm1, mm1
60  movd      [ARGBq], mm1
61
62.convertdone:
63  RET
64