1/* 2 * Copyright (C) 2007-2008 ARM Limited 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17/* 18 * 19 */ 20 21 .eabi_attribute 24, 1 22 .eabi_attribute 25, 1 23 24 .arm 25 .fpu neon 26 27 .text 28 .align 4 29 30armVCM4P10_WidthBranchTableMVIsNotZero: 31 .word WidthIs2MVIsNotZero-(P0+8), WidthIs2MVIsNotZero-(P0+8) 32 .word WidthIs4MVIsNotZero-(P0+8), WidthIs4MVIsNotZero-(P0+8) 33 .word WidthIs8MVIsNotZero-(P0+8) 34 35armVCM4P10_WidthBranchTableMVIsZero: 36 .word WidthIs2MVIsZero-(P0+8), WidthIs2MVIsZero-(P0+8) 37 .word WidthIs4MVIsZero-(P0+8), WidthIs4MVIsZero-(P0+8) 38 .word WidthIs8MVIsZero-(P0+8) 39 40 .global armVCM4P10_Interpolate_Chroma 41 .func armVCM4P10_Interpolate_Chroma 42armVCM4P10_Interpolate_Chroma: 43 PUSH {r4-r12,lr} 44 VPUSH {d8-d15} 45 LDRD r6,r7,[sp,#0x70] 46 LDRD r4,r5,[sp,#0x68] 47 RSB r8,r6,#8 48 RSB r9,r7,#8 49 CMN r6,r7 50 MOV r10,#1 51 ADREQ r11, armVCM4P10_WidthBranchTableMVIsZero 52 SUB lr,r1,r10 53 ADRNE r11, armVCM4P10_WidthBranchTableMVIsNotZero 54 VLD1.8 {d0},[r0],r10 55 SMULBB r12,r8,r9 56 SMULBB r9,r6,r9 57 VLD1.8 {d1},[r0],lr 58 SMULBB r8,r8,r7 59 SMULBB r6,r6,r7 60 VDUP.8 d12,r12 61 VDUP.8 d13,r9 62 VDUP.8 d14,r8 63 VDUP.8 d15,r6 64 LDR r11,[r11, r4, lsl #1] 65P0: ADD pc,r11 66 67WidthIs8MVIsNotZero: 68 VLD1.8 {d2},[r0],r10 69 VMULL.U8 q2,d0,d12 70 VLD1.8 {d3},[r0],lr 71 VMULL.U8 q3,d2,d12 72 VLD1.8 {d16},[r0],r10 73 VMLAL.U8 q2,d1,d13 74 VLD1.8 {d17},[r0],lr 75 VMULL.U8 q11,d16,d12 76 VMLAL.U8 q3,d3,d13 77 VLD1.8 {d18},[r0],r10 78 VMLAL.U8 q2,d2,d14 79 VMLAL.U8 q11,d17,d13 80 VMULL.U8 q12,d18,d12 81 VLD1.8 {d19},[r0],lr 82 VMLAL.U8 q3,d16,d14 83 VLD1.8 {d0},[r0],r10 84 VMLAL.U8 q12,d19,d13 85 VMLAL.U8 q11,d18,d14 86 VMLAL.U8 q2,d3,d15 87 VLD1.8 {d1},[r0],lr 88 VMLAL.U8 q12,d0,d14 89 VMLAL.U8 q3,d17,d15 90 VMLAL.U8 q11,d19,d15 91 SUBS r5,r5,#4 92 VMLAL.U8 q12,d1,d15 93 VQRSHRN.U16 d8,q2,#6 94 VQRSHRN.U16 d9,q3,#6 95 VQRSHRN.U16 d20,q11,#6 96 VST1.64 {d8},[r2],r3 97 VQRSHRN.U16 d21,q12,#6 98 VST1.64 {d9},[r2],r3 99 VST1.64 {d20},[r2],r3 100 VST1.64 {d21},[r2],r3 101 BGT WidthIs8MVIsNotZero 102 MOV r0,#0 103 VPOP {d8-d15} 104 POP {r4-r12,pc} 105 106WidthIs4MVIsNotZero: 107 VLD1.8 {d2},[r0],r10 108 VMULL.U8 q2,d0,d12 109 VMULL.U8 q3,d2,d12 110 VLD1.8 {d3},[r0],lr 111 VMLAL.U8 q2,d1,d13 112 VMLAL.U8 q3,d3,d13 113 VLD1.8 {d0},[r0],r10 114 VMLAL.U8 q2,d2,d14 115 VMLAL.U8 q3,d0,d14 116 VLD1.8 {d1},[r0],lr 117 SUBS r5,r5,#2 118 VMLAL.U8 q3,d1,d15 119 VMLAL.U8 q2,d3,d15 120 VQRSHRN.U16 d9,q3,#6 121 VQRSHRN.U16 d8,q2,#6 122 VST1.32 {d8[0]},[r2],r3 123 VST1.32 {d9[0]},[r2],r3 124 BGT WidthIs4MVIsNotZero 125 MOV r0,#0 126 VPOP {d8-d15} 127 POP {r4-r12,pc} 128 129WidthIs2MVIsNotZero: 130 VLD1.8 {d2},[r0],r10 131 VMULL.U8 q2,d0,d12 132 VMULL.U8 q3,d2,d12 133 VLD1.8 {d3},[r0],lr 134 VMLAL.U8 q2,d1,d13 135 VMLAL.U8 q3,d3,d13 136 VLD1.8 {d0},[r0],r10 137 VMLAL.U8 q2,d2,d14 138 VMLAL.U8 q3,d0,d14 139 VLD1.8 {d1},[r0],lr 140 SUBS r5,r5,#2 141 VMLAL.U8 q3,d1,d15 142 VMLAL.U8 q2,d3,d15 143 VQRSHRN.U16 d9,q3,#6 144 VQRSHRN.U16 d8,q2,#6 145 VST1.16 {d8[0]},[r2],r3 146 VST1.16 {d9[0]},[r2],r3 147 BGT WidthIs2MVIsNotZero 148 MOV r0,#0 149 VPOP {d8-d15} 150 POP {r4-r12,pc} 151 152WidthIs8MVIsZero: 153 SUB r0,r0,r1 154WidthIs8LoopMVIsZero: 155 VLD1.8 {d0},[r0],r1 156 SUBS r5,r5,#2 157 VLD1.8 {d1},[r0],r1 158 VST1.64 {d0},[r2],r3 159 VST1.64 {d1},[r2],r3 160 BGT WidthIs8LoopMVIsZero 161 MOV r0,#0 162 VPOP {d8-d15} 163 POP {r4-r12,pc} 164 165WidthIs4MVIsZero: 166 VLD1.8 {d1},[r0],r1 167 SUBS r5,r5,#2 168 VST1.32 {d0[0]},[r2],r3 169 VLD1.8 {d0},[r0],r1 170 VST1.32 {d1[0]},[r2],r3 171 BGT WidthIs4MVIsZero 172 MOV r0,#0 173 VPOP {d8-d15} 174 POP {r4-r12,pc} 175 176WidthIs2MVIsZero: 177 VLD1.8 {d1},[r0],r1 178 SUBS r5,r5,#2 179 VST1.16 {d0[0]},[r2],r3 180 VLD1.8 {d0},[r0],r1 181 VST1.16 {d1[0]},[r2],r3 182 BGT WidthIs2MVIsZero 183 MOV r0,#0 184 VPOP {d8-d15} 185 POP {r4-r12,pc} 186 .endfunc 187 188 .end 189 190