1; 2; Copyright (C) 2009 The Android Open Source Project 3; 4; Licensed under the Apache License, Version 2.0 (the "License"); 5; you may not use this file except in compliance with the License. 6; You may obtain a copy of the License at 7; 8; http://www.apache.org/licenses/LICENSE-2.0 9; 10; Unless required by applicable law or agreed to in writing, software 11; distributed under the License is distributed on an "AS IS" BASIS, 12; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13; See the License for the specific language governing permissions and 14; limitations under the License. 15; 16 17 REQUIRE8 18 PRESERVE8 19 20 AREA |.text|, CODE 21 22 EXPORT h264bsdWriteMacroblock 23 24; Input / output registers 25image RN 0 26data RN 1 27width RN 2 28luma RN 3 29cb RN 4 30cr RN 5 31cwidth RN 6 32 33; -- NEON registers -- 34 35qRow0 QN Q0.U8 36qRow1 QN Q1.U8 37qRow2 QN Q2.U8 38qRow3 QN Q3.U8 39qRow4 QN Q4.U8 40qRow5 QN Q5.U8 41qRow6 QN Q6.U8 42qRow7 QN Q7.U8 43qRow8 QN Q8.U8 44qRow9 QN Q9.U8 45qRow10 QN Q10.U8 46qRow11 QN Q11.U8 47qRow12 QN Q12.U8 48qRow13 QN Q13.U8 49qRow14 QN Q14.U8 50qRow15 QN Q15.U8 51 52dRow0 DN D0.U8 53dRow1 DN D1.U8 54dRow2 DN D2.U8 55dRow3 DN D3.U8 56dRow4 DN D4.U8 57dRow5 DN D5.U8 58dRow6 DN D6.U8 59dRow7 DN D7.U8 60dRow8 DN D8.U8 61dRow9 DN D9.U8 62dRow10 DN D10.U8 63dRow11 DN D11.U8 64dRow12 DN D12.U8 65dRow13 DN D13.U8 66dRow14 DN D14.U8 67dRow15 DN D15.U8 68 69;/*------------------------------------------------------------------------------ 70; 71; Function: h264bsdWriteMacroblock 72; 73; Functional description: 74; Write one macroblock into the image. Both luma and chroma 75; components will be written at the same time. 76; 77; Inputs: 78; data pointer to macroblock data to be written, 256 values for 79; luma followed by 64 values for both chroma components 80; 81; Outputs: 82; image pointer to the image where the macroblock will be written 83; 84; Returns: 85; none 86; 87;------------------------------------------------------------------------------*/ 88 89h264bsdWriteMacroblock 90 PUSH {r4-r6,lr} 91 VPUSH {q4-q7} 92 93 LDR width, [image, #4] 94 LDR luma, [image, #0xC] 95 LDR cb, [image, #0x10] 96 LDR cr, [image, #0x14] 97 98 99; Write luma 100 VLD1 {qRow0, qRow1}, [data]! 101 LSL width, width, #4 102 VLD1 {qRow2, qRow3}, [data]! 103 LSR cwidth, width, #1 104 VST1 {qRow0}, [luma@128], width 105 VLD1 {qRow4, qRow5}, [data]! 106 VST1 {qRow1}, [luma@128], width 107 VLD1 {qRow6, qRow7}, [data]! 108 VST1 {qRow2}, [luma@128], width 109 VLD1 {qRow8, qRow9}, [data]! 110 VST1 {qRow3}, [luma@128], width 111 VLD1 {qRow10, qRow11}, [data]! 112 VST1 {qRow4}, [luma@128], width 113 VLD1 {qRow12, qRow13}, [data]! 114 VST1 {qRow5}, [luma@128], width 115 VLD1 {qRow14, qRow15}, [data]! 116 VST1 {qRow6}, [luma@128], width 117 118 VLD1 {qRow0, qRow1}, [data]! ;cb rows 0,1,2,3 119 VST1 {qRow7}, [luma@128], width 120 VLD1 {qRow2, qRow3}, [data]! ;cb rows 4,5,6,7 121 VST1 {qRow8}, [luma@128], width 122 VLD1 {qRow4, qRow5}, [data]! ;cr rows 0,1,2,3 123 VST1 {qRow9}, [luma@128], width 124 VLD1 {qRow6, qRow7}, [data]! ;cr rows 4,5,6,7 125 VST1 {qRow10}, [luma@128], width 126 VST1 {dRow0}, [cb@64], cwidth 127 VST1 {dRow8}, [cr@64], cwidth 128 VST1 {qRow11}, [luma@128], width 129 VST1 {dRow1}, [cb@64], cwidth 130 VST1 {dRow9}, [cr@64], cwidth 131 VST1 {qRow12}, [luma@128], width 132 VST1 {dRow2}, [cb@64], cwidth 133 VST1 {dRow10}, [cr@64], cwidth 134 VST1 {qRow13}, [luma@128], width 135 VST1 {dRow3}, [cb@64], cwidth 136 VST1 {dRow11}, [cr@64], cwidth 137 VST1 {qRow14}, [luma@128], width 138 VST1 {dRow4}, [cb@64], cwidth 139 VST1 {dRow12}, [cr@64], cwidth 140 VST1 {qRow15}, [luma] 141 VST1 {dRow5}, [cb@64], cwidth 142 VST1 {dRow13}, [cr@64], cwidth 143 VST1 {dRow6}, [cb@64], cwidth 144 VST1 {dRow14}, [cr@64], cwidth 145 VST1 {dRow7}, [cb@64] 146 VST1 {dRow15}, [cr@64] 147 148 VPOP {q4-q7} 149 POP {r4-r6,pc} 150 END 151 152 153