1@
2@ Copyright (C) 2009 The Android Open Source Project
3@
4@ Licensed under the Apache License, Version 2.0 (the "License");
5@ you may not use this file except in compliance with the License.
6@ You may obtain a copy of the License at
7@
8@      http://www.apache.org/licenses/LICENSE-2.0
9@
10@ Unless required by applicable law or agreed to in writing, software
11@ distributed under the License is distributed on an "AS IS" BASIS,
12@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13@ See the License for the specific language governing permissions and
14@ limitations under the License.
15@
16
17#include "asm_common.S"
18
19    REQUIRE8
20    PRESERVE8
21
22    .arm
23    .fpu neon
24    .text
25
26/* Input / output registers */
27#define image   r0
28#define data    r1
29#define width   r2
30#define luma    r3
31#define cb      r4
32#define cr      r5
33#define cwidth  r6
34
35/* -- NEON registers -- */
36
37#define qRow0     Q0
38#define qRow1     Q1
39#define qRow2     Q2
40#define qRow3     Q3
41#define qRow4     Q4
42#define qRow5     Q5
43#define qRow6     Q6
44#define qRow7     Q7
45#define qRow8     Q8
46#define qRow9     Q9
47#define qRow10    Q10
48#define qRow11    Q11
49#define qRow12    Q12
50#define qRow13    Q13
51#define qRow14    Q14
52#define qRow15    Q15
53
54#define dRow0     D0
55#define dRow1     D1
56#define dRow2     D2
57#define dRow3     D3
58#define dRow4     D4
59#define dRow5     D5
60#define dRow6     D6
61#define dRow7     D7
62#define dRow8     D8
63#define dRow9     D9
64#define dRow10    D10
65#define dRow11    D11
66#define dRow12    D12
67#define dRow13    D13
68#define dRow14    D14
69#define dRow15    D15
70
71/*------------------------------------------------------------------------------
72
73    Function: h264bsdWriteMacroblock
74
75        Functional description:
76            Write one macroblock into the image. Both luma and chroma
77            components will be written at the same time.
78
79        Inputs:
80            data    pointer to macroblock data to be written, 256 values for
81                    luma followed by 64 values for both chroma components
82
83        Outputs:
84            image   pointer to the image where the macroblock will be written
85
86        Returns:
87            none
88
89------------------------------------------------------------------------------*/
90
91function h264bsdWriteMacroblock, export=1
92    PUSH    {r4-r6,lr}
93    VPUSH   {q4-q7}
94
95    LDR     width, [image, #4]
96    LDR     luma, [image, #0xC]
97    LDR     cb, [image, #0x10]
98    LDR     cr, [image, #0x14]
99
100
101@   Write luma
102    VLD1.8  {qRow0, qRow1}, [data]!
103    LSL     width, width, #4
104    VLD1.8  {qRow2, qRow3}, [data]!
105    LSR     cwidth, width, #1
106    VST1.8  {qRow0}, [luma,:128], width
107    VLD1.8  {qRow4, qRow5}, [data]!
108    VST1.8  {qRow1}, [luma,:128], width
109    VLD1.8  {qRow6, qRow7}, [data]!
110    VST1.8  {qRow2}, [luma,:128], width
111    VLD1.8  {qRow8, qRow9}, [data]!
112    VST1.8  {qRow3}, [luma,:128], width
113    VLD1.8  {qRow10, qRow11}, [data]!
114    VST1.8  {qRow4}, [luma,:128], width
115    VLD1.8  {qRow12, qRow13}, [data]!
116    VST1.8  {qRow5}, [luma,:128], width
117    VLD1.8  {qRow14, qRow15}, [data]!
118    VST1.8  {qRow6}, [luma,:128], width
119
120    VLD1.8  {qRow0, qRow1}, [data]! ;//cb rows 0,1,2,3
121    VST1.8  {qRow7}, [luma,:128], width
122    VLD1.8  {qRow2, qRow3}, [data]! ;//cb rows 4,5,6,7
123    VST1.8  {qRow8}, [luma,:128], width
124    VLD1.8  {qRow4, qRow5}, [data]! ;//cr rows 0,1,2,3
125    VST1.8  {qRow9}, [luma,:128], width
126    VLD1.8  {qRow6, qRow7}, [data]! ;//cr rows 4,5,6,7
127    VST1.8  {qRow10}, [luma,:128], width
128    VST1.8  {dRow0}, [cb,:64], cwidth
129    VST1.8  {dRow8}, [cr,:64], cwidth
130    VST1.8  {qRow11}, [luma,:128], width
131    VST1.8  {dRow1}, [cb,:64], cwidth
132    VST1.8  {dRow9}, [cr,:64], cwidth
133    VST1.8  {qRow12}, [luma,:128], width
134    VST1.8  {dRow2}, [cb,:64], cwidth
135    VST1.8  {dRow10}, [cr,:64], cwidth
136    VST1.8  {qRow13}, [luma,:128], width
137    VST1.8  {dRow3}, [cb,:64], cwidth
138    VST1.8  {dRow11}, [cr,:64], cwidth
139    VST1.8  {qRow14}, [luma,:128], width
140    VST1.8  {dRow4}, [cb,:64], cwidth
141    VST1.8  {dRow12}, [cr,:64], cwidth
142    VST1.8  {qRow15}, [luma]
143    VST1.8  {dRow5}, [cb,:64], cwidth
144    VST1.8  {dRow13}, [cr,:64], cwidth
145    VST1.8  {dRow6}, [cb,:64], cwidth
146    VST1.8  {dRow14}, [cr,:64], cwidth
147    VST1.8  {dRow7}, [cb,:64]
148    VST1.8  {dRow15}, [cr,:64]
149
150    VPOP    {q4-q7}
151    POP     {r4-r6,pc}
152@    BX      lr
153
154
155
156
157