1;
2; Copyright (C) 2009 The Android Open Source Project
3;
4; Licensed under the Apache License, Version 2.0 (the "License");
5; you may not use this file except in compliance with the License.
6; You may obtain a copy of the License at
7;
8;      http://www.apache.org/licenses/LICENSE-2.0
9;
10; Unless required by applicable law or agreed to in writing, software
11; distributed under the License is distributed on an "AS IS" BASIS,
12; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13; See the License for the specific language governing permissions and
14; limitations under the License.
15;
16
17    REQUIRE8
18    PRESERVE8
19
20    AREA    |.text|, CODE
21
22    EXPORT h264bsdWriteMacroblock
23
24; Input / output registers
25image   RN  0
26data    RN  1
27width   RN  2
28luma    RN  3
29cb      RN  4
30cr      RN  5
31cwidth  RN  6
32
33; -- NEON registers --
34
35qRow0   QN  Q0.U8
36qRow1   QN  Q1.U8
37qRow2   QN  Q2.U8
38qRow3   QN  Q3.U8
39qRow4   QN  Q4.U8
40qRow5   QN  Q5.U8
41qRow6   QN  Q6.U8
42qRow7   QN  Q7.U8
43qRow8   QN  Q8.U8
44qRow9   QN  Q9.U8
45qRow10  QN  Q10.U8
46qRow11  QN  Q11.U8
47qRow12  QN  Q12.U8
48qRow13  QN  Q13.U8
49qRow14  QN  Q14.U8
50qRow15  QN  Q15.U8
51
52dRow0   DN  D0.U8
53dRow1   DN  D1.U8
54dRow2   DN  D2.U8
55dRow3   DN  D3.U8
56dRow4   DN  D4.U8
57dRow5   DN  D5.U8
58dRow6   DN  D6.U8
59dRow7   DN  D7.U8
60dRow8   DN  D8.U8
61dRow9   DN  D9.U8
62dRow10  DN  D10.U8
63dRow11  DN  D11.U8
64dRow12  DN  D12.U8
65dRow13  DN  D13.U8
66dRow14  DN  D14.U8
67dRow15  DN  D15.U8
68
69;/*------------------------------------------------------------------------------
70;
71;    Function: h264bsdWriteMacroblock
72;
73;        Functional description:
74;            Write one macroblock into the image. Both luma and chroma
75;            components will be written at the same time.
76;
77;        Inputs:
78;            data    pointer to macroblock data to be written, 256 values for
79;                    luma followed by 64 values for both chroma components
80;
81;        Outputs:
82;            image   pointer to the image where the macroblock will be written
83;
84;        Returns:
85;            none
86;
87;------------------------------------------------------------------------------*/
88
89h264bsdWriteMacroblock
90    PUSH    {r4-r6,lr}
91    VPUSH   {q4-q7}
92
93    LDR     width, [image, #4]
94    LDR     luma, [image, #0xC]
95    LDR     cb, [image, #0x10]
96    LDR     cr, [image, #0x14]
97
98
99;   Write luma
100    VLD1    {qRow0, qRow1}, [data]!
101    LSL     width, width, #4
102    VLD1    {qRow2, qRow3}, [data]!
103    LSR     cwidth, width, #1
104    VST1    {qRow0}, [luma@128], width
105    VLD1    {qRow4, qRow5}, [data]!
106    VST1    {qRow1}, [luma@128], width
107    VLD1    {qRow6, qRow7}, [data]!
108    VST1    {qRow2}, [luma@128], width
109    VLD1    {qRow8, qRow9}, [data]!
110    VST1    {qRow3}, [luma@128], width
111    VLD1    {qRow10, qRow11}, [data]!
112    VST1    {qRow4}, [luma@128], width
113    VLD1    {qRow12, qRow13}, [data]!
114    VST1    {qRow5}, [luma@128], width
115    VLD1    {qRow14, qRow15}, [data]!
116    VST1    {qRow6}, [luma@128], width
117
118    VLD1    {qRow0, qRow1}, [data]! ;cb rows 0,1,2,3
119    VST1    {qRow7}, [luma@128], width
120    VLD1    {qRow2, qRow3}, [data]! ;cb rows 4,5,6,7
121    VST1    {qRow8}, [luma@128], width
122    VLD1    {qRow4, qRow5}, [data]! ;cr rows 0,1,2,3
123    VST1    {qRow9}, [luma@128], width
124    VLD1    {qRow6, qRow7}, [data]! ;cr rows 4,5,6,7
125    VST1    {qRow10}, [luma@128], width
126    VST1    {dRow0}, [cb@64], cwidth
127    VST1    {dRow8}, [cr@64], cwidth
128    VST1    {qRow11}, [luma@128], width
129    VST1    {dRow1}, [cb@64], cwidth
130    VST1    {dRow9}, [cr@64], cwidth
131    VST1    {qRow12}, [luma@128], width
132    VST1    {dRow2}, [cb@64], cwidth
133    VST1    {dRow10}, [cr@64], cwidth
134    VST1    {qRow13}, [luma@128], width
135    VST1    {dRow3}, [cb@64], cwidth
136    VST1    {dRow11}, [cr@64], cwidth
137    VST1    {qRow14}, [luma@128], width
138    VST1    {dRow4}, [cb@64], cwidth
139    VST1    {dRow12}, [cr@64], cwidth
140    VST1    {qRow15}, [luma]
141    VST1    {dRow5}, [cb@64], cwidth
142    VST1    {dRow13}, [cr@64], cwidth
143    VST1    {dRow6}, [cb@64], cwidth
144    VST1    {dRow14}, [cr@64], cwidth
145    VST1    {dRow7}, [cb@64]
146    VST1    {dRow15}, [cr@64]
147
148    VPOP    {q4-q7}
149    POP     {r4-r6,pc}
150    END
151
152
153