1@ 2@ Copyright (C) 2009 The Android Open Source Project 3@ 4@ Licensed under the Apache License, Version 2.0 (the "License"); 5@ you may not use this file except in compliance with the License. 6@ You may obtain a copy of the License at 7@ 8@ http://www.apache.org/licenses/LICENSE-2.0 9@ 10@ Unless required by applicable law or agreed to in writing, software 11@ distributed under the License is distributed on an "AS IS" BASIS, 12@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13@ See the License for the specific language governing permissions and 14@ limitations under the License. 15@ 16 17#include "asm_common.S" 18 19 require8 20 preserve8 21 22 .arm 23 .fpu neon 24 .text 25 26/* Input / output registers */ 27#define image r0 28#define data r1 29#define width r2 30#define luma r3 31#define cb r4 32#define cr r5 33#define cwidth r6 34 35/* -- NEON registers -- */ 36 37#define qRow0 Q0.U8 38#define qRow1 Q1.U8 39#define qRow2 Q2.U8 40#define qRow3 Q3.U8 41#define qRow4 Q4.U8 42#define qRow5 Q5.U8 43#define qRow6 Q6.U8 44#define qRow7 Q7.U8 45#define qRow8 Q8.U8 46#define qRow9 Q9.U8 47#define qRow10 Q10.U8 48#define qRow11 Q11.U8 49#define qRow12 Q12.U8 50#define qRow13 Q13.U8 51#define qRow14 Q14.U8 52#define qRow15 Q15.U8 53 54#define dRow0 D0.U8 55#define dRow1 D1.U8 56#define dRow2 D2.U8 57#define dRow3 D3.U8 58#define dRow4 D4.U8 59#define dRow5 D5.U8 60#define dRow6 D6.U8 61#define dRow7 D7.U8 62#define dRow8 D8.U8 63#define dRow9 D9.U8 64#define dRow10 D10.U8 65#define dRow11 D11.U8 66#define dRow12 D12.U8 67#define dRow13 D13.U8 68#define dRow14 D14.U8 69#define dRow15 D15.U8 70 71/*------------------------------------------------------------------------------ 72 73 Function: h264bsdWriteMacroblock 74 75 Functional description: 76 Write one macroblock into the image. Both luma and chroma 77 components will be written at the same time. 78 79 Inputs: 80 data pointer to macroblock data to be written, 256 values for 81 luma followed by 64 values for both chroma components 82 83 Outputs: 84 image pointer to the image where the macroblock will be written 85 86 Returns: 87 none 88 89------------------------------------------------------------------------------*/ 90 91function h264bsdWriteMacroblock, export=1 92 PUSH {r4-r6,lr} 93 VPUSH {q4-q7} 94 95 LDR width, [image, #4] 96 LDR luma, [image, #0xC] 97 LDR cb, [image, #0x10] 98 LDR cr, [image, #0x14] 99 100 101@ Write luma 102 VLD1 {qRow0, qRow1}, [data]! 103 LSL width, width, #4 104 VLD1 {qRow2, qRow3}, [data]! 105 LSR cwidth, width, #1 106 VST1 {qRow0}, [luma,:128], width 107 VLD1 {qRow4, qRow5}, [data]! 108 VST1 {qRow1}, [luma,:128], width 109 VLD1 {qRow6, qRow7}, [data]! 110 VST1 {qRow2}, [luma,:128], width 111 VLD1 {qRow8, qRow9}, [data]! 112 VST1 {qRow3}, [luma,:128], width 113 VLD1 {qRow10, qRow11}, [data]! 114 VST1 {qRow4}, [luma,:128], width 115 VLD1 {qRow12, qRow13}, [data]! 116 VST1 {qRow5}, [luma,:128], width 117 VLD1 {qRow14, qRow15}, [data]! 118 VST1 {qRow6}, [luma,:128], width 119 120 VLD1 {qRow0, qRow1}, [data]! ;//cb rows 0,1,2,3 121 VST1 {qRow7}, [luma,:128], width 122 VLD1 {qRow2, qRow3}, [data]! ;//cb rows 4,5,6,7 123 VST1 {qRow8}, [luma,:128], width 124 VLD1 {qRow4, qRow5}, [data]! ;//cr rows 0,1,2,3 125 VST1 {qRow9}, [luma,:128], width 126 VLD1 {qRow6, qRow7}, [data]! ;//cr rows 4,5,6,7 127 VST1 {qRow10}, [luma,:128], width 128 VST1 {dRow0}, [cb,:64], cwidth 129 VST1 {dRow8}, [cr,:64], cwidth 130 VST1 {qRow11}, [luma,:128], width 131 VST1 {dRow1}, [cb,:64], cwidth 132 VST1 {dRow9}, [cr,:64], cwidth 133 VST1 {qRow12}, [luma,:128], width 134 VST1 {dRow2}, [cb,:64], cwidth 135 VST1 {dRow10}, [cr,:64], cwidth 136 VST1 {qRow13}, [luma,:128], width 137 VST1 {dRow3}, [cb,:64], cwidth 138 VST1 {dRow11}, [cr,:64], cwidth 139 VST1 {qRow14}, [luma,:128], width 140 VST1 {dRow4}, [cb,:64], cwidth 141 VST1 {dRow12}, [cr,:64], cwidth 142 VST1 {qRow15}, [luma] 143 VST1 {dRow5}, [cb,:64], cwidth 144 VST1 {dRow13}, [cr,:64], cwidth 145 VST1 {dRow6}, [cb,:64], cwidth 146 VST1 {dRow14}, [cr,:64], cwidth 147 VST1 {dRow7}, [cb,:64] 148 VST1 {dRow15}, [cr,:64] 149 150 VPOP {q4-q7} 151 POP {r4-r6,pc} 152@ BX lr 153 154 .endfunc 155 156 157 158