10c1bc742181ded4930842b46e9507372f0b1b963James Dong@ 20c1bc742181ded4930842b46e9507372f0b1b963James Dong@ Copyright (C) 2009 The Android Open Source Project 30c1bc742181ded4930842b46e9507372f0b1b963James Dong@ 40c1bc742181ded4930842b46e9507372f0b1b963James Dong@ Licensed under the Apache License, Version 2.0 (the "License"); 50c1bc742181ded4930842b46e9507372f0b1b963James Dong@ you may not use this file except in compliance with the License. 60c1bc742181ded4930842b46e9507372f0b1b963James Dong@ You may obtain a copy of the License at 70c1bc742181ded4930842b46e9507372f0b1b963James Dong@ 80c1bc742181ded4930842b46e9507372f0b1b963James Dong@ http://www.apache.org/licenses/LICENSE-2.0 90c1bc742181ded4930842b46e9507372f0b1b963James Dong@ 100c1bc742181ded4930842b46e9507372f0b1b963James Dong@ Unless required by applicable law or agreed to in writing, software 110c1bc742181ded4930842b46e9507372f0b1b963James Dong@ distributed under the License is distributed on an "AS IS" BASIS, 120c1bc742181ded4930842b46e9507372f0b1b963James Dong@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 130c1bc742181ded4930842b46e9507372f0b1b963James Dong@ See the License for the specific language governing permissions and 140c1bc742181ded4930842b46e9507372f0b1b963James Dong@ limitations under the License. 150c1bc742181ded4930842b46e9507372f0b1b963James Dong@ 160c1bc742181ded4930842b46e9507372f0b1b963James Dong 170c1bc742181ded4930842b46e9507372f0b1b963James Dong#include "asm_common.S" 180c1bc742181ded4930842b46e9507372f0b1b963James Dong 190c1bc742181ded4930842b46e9507372f0b1b963James Dong require8 200c1bc742181ded4930842b46e9507372f0b1b963James Dong preserve8 210c1bc742181ded4930842b46e9507372f0b1b963James Dong 220c1bc742181ded4930842b46e9507372f0b1b963James Dong .arm 230c1bc742181ded4930842b46e9507372f0b1b963James Dong .fpu neon 240c1bc742181ded4930842b46e9507372f0b1b963James Dong .text 250c1bc742181ded4930842b46e9507372f0b1b963James Dong 260c1bc742181ded4930842b46e9507372f0b1b963James Dong/* Input / output registers */ 270c1bc742181ded4930842b46e9507372f0b1b963James Dong#define image r0 280c1bc742181ded4930842b46e9507372f0b1b963James Dong#define data r1 290c1bc742181ded4930842b46e9507372f0b1b963James Dong#define width r2 300c1bc742181ded4930842b46e9507372f0b1b963James Dong#define luma r3 310c1bc742181ded4930842b46e9507372f0b1b963James Dong#define cb r4 320c1bc742181ded4930842b46e9507372f0b1b963James Dong#define cr r5 330c1bc742181ded4930842b46e9507372f0b1b963James Dong#define cwidth r6 340c1bc742181ded4930842b46e9507372f0b1b963James Dong 350c1bc742181ded4930842b46e9507372f0b1b963James Dong/* -- NEON registers -- */ 360c1bc742181ded4930842b46e9507372f0b1b963James Dong 370c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow0 Q0.U8 380c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow1 Q1.U8 390c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow2 Q2.U8 400c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow3 Q3.U8 410c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow4 Q4.U8 420c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow5 Q5.U8 430c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow6 Q6.U8 440c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow7 Q7.U8 450c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow8 Q8.U8 460c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow9 Q9.U8 470c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow10 Q10.U8 480c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow11 Q11.U8 490c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow12 Q12.U8 500c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow13 Q13.U8 510c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow14 Q14.U8 520c1bc742181ded4930842b46e9507372f0b1b963James Dong#define qRow15 Q15.U8 530c1bc742181ded4930842b46e9507372f0b1b963James Dong 540c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow0 D0.U8 550c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow1 D1.U8 560c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow2 D2.U8 570c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow3 D3.U8 580c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow4 D4.U8 590c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow5 D5.U8 600c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow6 D6.U8 610c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow7 D7.U8 620c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow8 D8.U8 630c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow9 D9.U8 640c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow10 D10.U8 650c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow11 D11.U8 660c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow12 D12.U8 670c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow13 D13.U8 680c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow14 D14.U8 690c1bc742181ded4930842b46e9507372f0b1b963James Dong#define dRow15 D15.U8 700c1bc742181ded4930842b46e9507372f0b1b963James Dong 710c1bc742181ded4930842b46e9507372f0b1b963James Dong/*------------------------------------------------------------------------------ 720c1bc742181ded4930842b46e9507372f0b1b963James Dong 730c1bc742181ded4930842b46e9507372f0b1b963James Dong Function: h264bsdWriteMacroblock 740c1bc742181ded4930842b46e9507372f0b1b963James Dong 750c1bc742181ded4930842b46e9507372f0b1b963James Dong Functional description: 760c1bc742181ded4930842b46e9507372f0b1b963James Dong Write one macroblock into the image. Both luma and chroma 770c1bc742181ded4930842b46e9507372f0b1b963James Dong components will be written at the same time. 780c1bc742181ded4930842b46e9507372f0b1b963James Dong 790c1bc742181ded4930842b46e9507372f0b1b963James Dong Inputs: 800c1bc742181ded4930842b46e9507372f0b1b963James Dong data pointer to macroblock data to be written, 256 values for 810c1bc742181ded4930842b46e9507372f0b1b963James Dong luma followed by 64 values for both chroma components 820c1bc742181ded4930842b46e9507372f0b1b963James Dong 830c1bc742181ded4930842b46e9507372f0b1b963James Dong Outputs: 840c1bc742181ded4930842b46e9507372f0b1b963James Dong image pointer to the image where the macroblock will be written 850c1bc742181ded4930842b46e9507372f0b1b963James Dong 860c1bc742181ded4930842b46e9507372f0b1b963James Dong Returns: 870c1bc742181ded4930842b46e9507372f0b1b963James Dong none 880c1bc742181ded4930842b46e9507372f0b1b963James Dong 890c1bc742181ded4930842b46e9507372f0b1b963James Dong------------------------------------------------------------------------------*/ 900c1bc742181ded4930842b46e9507372f0b1b963James Dong 910c1bc742181ded4930842b46e9507372f0b1b963James Dongfunction h264bsdWriteMacroblock, export=1 920c1bc742181ded4930842b46e9507372f0b1b963James Dong PUSH {r4-r6,lr} 930c1bc742181ded4930842b46e9507372f0b1b963James Dong VPUSH {q4-q7} 940c1bc742181ded4930842b46e9507372f0b1b963James Dong 950c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR width, [image, #4] 960c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR luma, [image, #0xC] 970c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR cb, [image, #0x10] 980c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR cr, [image, #0x14] 990c1bc742181ded4930842b46e9507372f0b1b963James Dong 1000c1bc742181ded4930842b46e9507372f0b1b963James Dong 1010c1bc742181ded4930842b46e9507372f0b1b963James Dong@ Write luma 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qRow0, qRow1}, [data]! 1030c1bc742181ded4930842b46e9507372f0b1b963James Dong LSL width, width, #4 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qRow2, qRow3}, [data]! 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong LSR cwidth, width, #1 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {qRow0}, [luma,:128], width 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qRow4, qRow5}, [data]! 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {qRow1}, [luma,:128], width 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qRow6, qRow7}, [data]! 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {qRow2}, [luma,:128], width 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qRow8, qRow9}, [data]! 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {qRow3}, [luma,:128], width 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qRow10, qRow11}, [data]! 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {qRow4}, [luma,:128], width 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qRow12, qRow13}, [data]! 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {qRow5}, [luma,:128], width 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qRow14, qRow15}, [data]! 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {qRow6}, [luma,:128], width 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qRow0, qRow1}, [data]! ;//cb rows 0,1,2,3 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {qRow7}, [luma,:128], width 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qRow2, qRow3}, [data]! ;//cb rows 4,5,6,7 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {qRow8}, [luma,:128], width 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qRow4, qRow5}, [data]! ;//cr rows 0,1,2,3 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {qRow9}, [luma,:128], width 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qRow6, qRow7}, [data]! ;//cr rows 4,5,6,7 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {qRow10}, [luma,:128], width 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {dRow0}, [cb,:64], cwidth 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {dRow8}, [cr,:64], cwidth 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {qRow11}, [luma,:128], width 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {dRow1}, [cb,:64], cwidth 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {dRow9}, [cr,:64], cwidth 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {qRow12}, [luma,:128], width 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {dRow2}, [cb,:64], cwidth 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {dRow10}, [cr,:64], cwidth 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {qRow13}, [luma,:128], width 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {dRow3}, [cb,:64], cwidth 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {dRow11}, [cr,:64], cwidth 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {qRow14}, [luma,:128], width 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {dRow4}, [cb,:64], cwidth 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {dRow12}, [cr,:64], cwidth 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {qRow15}, [luma] 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {dRow5}, [cb,:64], cwidth 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {dRow13}, [cr,:64], cwidth 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {dRow6}, [cb,:64], cwidth 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {dRow14}, [cr,:64], cwidth 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {dRow7}, [cb,:64] 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 {dRow15}, [cr,:64] 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong VPOP {q4-q7} 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong POP {r4-r6,pc} 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong@ BX lr 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong .endfunc 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong 158