10c1bc742181ded4930842b46e9507372f0b1b963James Dong@
20c1bc742181ded4930842b46e9507372f0b1b963James Dong@ Copyright (C) 2009 The Android Open Source Project
30c1bc742181ded4930842b46e9507372f0b1b963James Dong@
40c1bc742181ded4930842b46e9507372f0b1b963James Dong@ Licensed under the Apache License, Version 2.0 (the "License");
50c1bc742181ded4930842b46e9507372f0b1b963James Dong@ you may not use this file except in compliance with the License.
60c1bc742181ded4930842b46e9507372f0b1b963James Dong@ You may obtain a copy of the License at
70c1bc742181ded4930842b46e9507372f0b1b963James Dong@
80c1bc742181ded4930842b46e9507372f0b1b963James Dong@      http://www.apache.org/licenses/LICENSE-2.0
90c1bc742181ded4930842b46e9507372f0b1b963James Dong@
100c1bc742181ded4930842b46e9507372f0b1b963James Dong@ Unless required by applicable law or agreed to in writing, software
110c1bc742181ded4930842b46e9507372f0b1b963James Dong@ distributed under the License is distributed on an "AS IS" BASIS,
120c1bc742181ded4930842b46e9507372f0b1b963James Dong@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
130c1bc742181ded4930842b46e9507372f0b1b963James Dong@ See the License for the specific language governing permissions and
140c1bc742181ded4930842b46e9507372f0b1b963James Dong@ limitations under the License.
150c1bc742181ded4930842b46e9507372f0b1b963James Dong@
160c1bc742181ded4930842b46e9507372f0b1b963James Dong
170c1bc742181ded4930842b46e9507372f0b1b963James Dong#include "asm_common.S"
180c1bc742181ded4930842b46e9507372f0b1b963James Dong
190c1bc742181ded4930842b46e9507372f0b1b963James Dong    require8
200c1bc742181ded4930842b46e9507372f0b1b963James Dong    preserve8
210c1bc742181ded4930842b46e9507372f0b1b963James Dong
220c1bc742181ded4930842b46e9507372f0b1b963James Dong    .arm
230c1bc742181ded4930842b46e9507372f0b1b963James Dong    .fpu neon
240c1bc742181ded4930842b46e9507372f0b1b963James Dong    .text
250c1bc742181ded4930842b46e9507372f0b1b963James Dong
260c1bc742181ded4930842b46e9507372f0b1b963James Dong/* Input / output registers */
270c1bc742181ded4930842b46e9507372f0b1b963James Dong#define image   r0
280c1bc742181ded4930842b46e9507372f0b1b963James Dong#define data    r1
290c1bc742181ded4930842b46e9507372f0b1b963James Dong#define width   r2
300c1bc742181ded4930842b46e9507372f0b1b963James Dong#define luma    r3
310c1bc742181ded4930842b46e9507372f0b1b963James Dong#define cb      r4
320c1bc742181ded4930842b46e9507372f0b1b963James Dong#define cr      r5
330c1bc742181ded4930842b46e9507372f0b1b963James Dong#define cwidth  r6
340c1bc742181ded4930842b46e9507372f0b1b963James Dong
350c1bc742181ded4930842b46e9507372f0b1b963James Dong/* -- NEON registers -- */
360c1bc742181ded4930842b46e9507372f0b1b963James Dong
370c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow0     Q0.U8
380c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow1     Q1.U8
390c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow2     Q2.U8
400c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow3     Q3.U8
410c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow4     Q4.U8
420c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow5     Q5.U8
430c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow6     Q6.U8
440c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow7     Q7.U8
450c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow8     Q8.U8
460c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow9     Q9.U8
470c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow10    Q10.U8
480c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow11    Q11.U8
490c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow12    Q12.U8
500c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow13    Q13.U8
510c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow14    Q14.U8
520c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow15    Q15.U8
530c1bc742181ded4930842b46e9507372f0b1b963James Dong
540c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow0     D0.U8
550c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow1     D1.U8
560c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow2     D2.U8
570c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow3     D3.U8
580c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow4     D4.U8
590c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow5     D5.U8
600c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow6     D6.U8
610c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow7     D7.U8
620c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow8     D8.U8
630c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow9     D9.U8
640c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow10    D10.U8
650c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow11    D11.U8
660c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow12    D12.U8
670c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow13    D13.U8
680c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow14    D14.U8
690c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow15    D15.U8
700c1bc742181ded4930842b46e9507372f0b1b963James Dong
710c1bc742181ded4930842b46e9507372f0b1b963James Dong/*------------------------------------------------------------------------------
720c1bc742181ded4930842b46e9507372f0b1b963James Dong
730c1bc742181ded4930842b46e9507372f0b1b963James Dong    Function: h264bsdWriteMacroblock
740c1bc742181ded4930842b46e9507372f0b1b963James Dong
750c1bc742181ded4930842b46e9507372f0b1b963James Dong        Functional description:
760c1bc742181ded4930842b46e9507372f0b1b963James Dong            Write one macroblock into the image. Both luma and chroma
770c1bc742181ded4930842b46e9507372f0b1b963James Dong            components will be written at the same time.
780c1bc742181ded4930842b46e9507372f0b1b963James Dong
790c1bc742181ded4930842b46e9507372f0b1b963James Dong        Inputs:
800c1bc742181ded4930842b46e9507372f0b1b963James Dong            data    pointer to macroblock data to be written, 256 values for
810c1bc742181ded4930842b46e9507372f0b1b963James Dong                    luma followed by 64 values for both chroma components
820c1bc742181ded4930842b46e9507372f0b1b963James Dong
830c1bc742181ded4930842b46e9507372f0b1b963James Dong        Outputs:
840c1bc742181ded4930842b46e9507372f0b1b963James Dong            image   pointer to the image where the macroblock will be written
850c1bc742181ded4930842b46e9507372f0b1b963James Dong
860c1bc742181ded4930842b46e9507372f0b1b963James Dong        Returns:
870c1bc742181ded4930842b46e9507372f0b1b963James Dong            none
880c1bc742181ded4930842b46e9507372f0b1b963James Dong
890c1bc742181ded4930842b46e9507372f0b1b963James Dong------------------------------------------------------------------------------*/
900c1bc742181ded4930842b46e9507372f0b1b963James Dong
910c1bc742181ded4930842b46e9507372f0b1b963James Dongfunction h264bsdWriteMacroblock, export=1
920c1bc742181ded4930842b46e9507372f0b1b963James Dong    PUSH    {r4-r6,lr}
930c1bc742181ded4930842b46e9507372f0b1b963James Dong    VPUSH   {q4-q7}
940c1bc742181ded4930842b46e9507372f0b1b963James Dong
950c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     width, [image, #4]
960c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     luma, [image, #0xC]
970c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     cb, [image, #0x10]
980c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     cr, [image, #0x14]
990c1bc742181ded4930842b46e9507372f0b1b963James Dong
1000c1bc742181ded4930842b46e9507372f0b1b963James Dong
1010c1bc742181ded4930842b46e9507372f0b1b963James Dong@   Write luma
1020c1bc742181ded4930842b46e9507372f0b1b963James Dong    VLD1    {qRow0, qRow1}, [data]!
1030c1bc742181ded4930842b46e9507372f0b1b963James Dong    LSL     width, width, #4
1040c1bc742181ded4930842b46e9507372f0b1b963James Dong    VLD1    {qRow2, qRow3}, [data]!
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong    LSR     cwidth, width, #1
1060c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {qRow0}, [luma,:128], width
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong    VLD1    {qRow4, qRow5}, [data]!
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {qRow1}, [luma,:128], width
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong    VLD1    {qRow6, qRow7}, [data]!
1100c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {qRow2}, [luma,:128], width
1110c1bc742181ded4930842b46e9507372f0b1b963James Dong    VLD1    {qRow8, qRow9}, [data]!
1120c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {qRow3}, [luma,:128], width
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong    VLD1    {qRow10, qRow11}, [data]!
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {qRow4}, [luma,:128], width
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong    VLD1    {qRow12, qRow13}, [data]!
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {qRow5}, [luma,:128], width
1170c1bc742181ded4930842b46e9507372f0b1b963James Dong    VLD1    {qRow14, qRow15}, [data]!
1180c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {qRow6}, [luma,:128], width
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong    VLD1    {qRow0, qRow1}, [data]! ;//cb rows 0,1,2,3
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {qRow7}, [luma,:128], width
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong    VLD1    {qRow2, qRow3}, [data]! ;//cb rows 4,5,6,7
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {qRow8}, [luma,:128], width
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong    VLD1    {qRow4, qRow5}, [data]! ;//cr rows 0,1,2,3
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {qRow9}, [luma,:128], width
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong    VLD1    {qRow6, qRow7}, [data]! ;//cr rows 4,5,6,7
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {qRow10}, [luma,:128], width
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {dRow0}, [cb,:64], cwidth
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {dRow8}, [cr,:64], cwidth
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {qRow11}, [luma,:128], width
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {dRow1}, [cb,:64], cwidth
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {dRow9}, [cr,:64], cwidth
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {qRow12}, [luma,:128], width
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {dRow2}, [cb,:64], cwidth
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {dRow10}, [cr,:64], cwidth
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {qRow13}, [luma,:128], width
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {dRow3}, [cb,:64], cwidth
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {dRow11}, [cr,:64], cwidth
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {qRow14}, [luma,:128], width
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {dRow4}, [cb,:64], cwidth
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {dRow12}, [cr,:64], cwidth
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {qRow15}, [luma]
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {dRow5}, [cb,:64], cwidth
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {dRow13}, [cr,:64], cwidth
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {dRow6}, [cb,:64], cwidth
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {dRow14}, [cr,:64], cwidth
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {dRow7}, [cb,:64]
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong    VST1    {dRow15}, [cr,:64]
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong    VPOP    {q4-q7}
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong    POP     {r4-r6,pc}
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong@    BX      lr
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong    .endfunc
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong
158