omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1/* 2 * Copyright (C) 2007-2008 ARM Limited 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17/* 18 * 19 */ 20 21 .eabi_attribute 24, 1 22 .eabi_attribute 25, 1 23 24 .arm 25 .fpu neon 26 .text 27 28 .global omxVCM4P10_FilterDeblockingChroma_VerEdge_I 29 .func omxVCM4P10_FilterDeblockingChroma_VerEdge_I 30omxVCM4P10_FilterDeblockingChroma_VerEdge_I: 31 PUSH {r4-r12,lr} 32 VPUSH {d8-d15} 33 VLD1.8 {d0[]},[r2]! 34 SUB r0,r0,#4 35 VLD1.8 {d2[]},[r3]! 36 LDR r4,[sp,#0x6c] 37 LDR r5,[sp,#0x68] 38 LDR r8, =0x4040404 39 LDR r9, =0x3030303 40 VMOV.I8 d14,#0 41 VMOV.I8 d15,#0x1 42 VMOV.I16 d1,#0x4 43 MOV r7,#0x40000000 44L0x34: 45 LDR r6,[r4],#8 46 ADD r10,r0,r1 47 ADD lr,r1,r1 48 VLD1.8 {d7},[r0],lr 49 VLD1.8 {d8},[r10],lr 50 VLD1.8 {d5},[r0],lr 51 VLD1.8 {d10},[r10],lr 52 VLD1.8 {d6},[r0],lr 53 VLD1.8 {d9},[r10],lr 54 VLD1.8 {d4},[r0],lr 55 VLD1.8 {d11},[r10],lr 56 VZIP.8 d7,d8 57 VZIP.8 d5,d10 58 VZIP.8 d6,d9 59 VZIP.8 d4,d11 60 VZIP.16 d7,d5 61 VZIP.16 d8,d10 62 VZIP.16 d6,d4 63 VZIP.16 d9,d11 64 VTRN.32 d7,d6 65 VTRN.32 d5,d4 66 VTRN.32 d10,d11 67 VTRN.32 d8,d9 68 CMP r6,#0 69 VABD.U8 d19,d6,d4 70 VABD.U8 d13,d4,d8 71 BEQ L0x170 72 VABD.U8 d12,d5,d4 73 VABD.U8 d18,d9,d8 74 VMOV.32 d26[0],r6 75 VCGT.U8 d16,d0,d13 76 VMAX.U8 d12,d18,d12 77 VMOVL.U8 q13,d26 78 VABD.U8 d17,d10,d8 79 VCGT.S16 d27,d26,#0 80 VCGT.U8 d12,d2,d12 81 VCGT.U8 d19,d2,d19 82 VAND d16,d16,d27 83 TST r6,r9 84 VCGT.U8 d17,d2,d17 85 VAND d16,d16,d12 86 VAND d12,d16,d17 87 VAND d17,d16,d19 88 BLNE armVCM4P10_DeblockingChromabSLT4_unsafe 89 TST r6,r8 90 SUB r0,r0,r1,LSL #3 91 VTST.16 d26,d26,d1 92 BLNE armVCM4P10_DeblockingChromabSGE4_unsafe 93 VBIT d29,d13,d26 94 VBIT d24,d31,d26 95 ADD r10,r0,#3 96 VBIF d29,d4,d16 97 ADD r12,r10,r1 98 ADD lr,r1,r1 99 VBIF d24,d8,d16 100 ADDS r7,r7,r7 101 VST1.8 {d29[0]},[r10],lr 102 VST1.8 {d29[1]},[r12],lr 103 VST1.8 {d29[2]},[r10],lr 104 VST1.8 {d29[3]},[r12],lr 105 VST1.8 {d29[4]},[r10],lr 106 VST1.8 {d29[5]},[r12],lr 107 VST1.8 {d29[6]},[r10],lr 108 VST1.8 {d29[7]},[r12],lr 109 ADD r12,r0,#4 110 ADD r10,r12,r1 111 VST1.8 {d24[0]},[r12],lr 112 VST1.8 {d24[1]},[r10],lr 113 VST1.8 {d24[2]},[r12],lr 114 VST1.8 {d24[3]},[r10],lr 115 VST1.8 {d24[4]},[r12],lr 116 VST1.8 {d24[5]},[r10],lr 117 VST1.8 {d24[6]},[r12],lr 118 VST1.8 {d24[7]},[r10],lr 119 ADD r0,r0,#4 120 BNE L0x34 121 MOV r0,#0 122 VPOP {d8-d15} 123 POP {r4-r12,pc} 124L0x170: 125 VLD1.8 {d0[]},[r2] 126 ADD r0,r0,#4 127 SUB r0,r0,r1,LSL #3 128 ADDS r7,r7,r7 129 VLD1.8 {d2[]},[r3] 130 ADD r5,r5,#4 131 BNE L0x34 132 MOV r0,#0 133 VPOP {d8-d15} 134 POP {r4-r12,pc} 135 .endfunc 136 137 .end 138 139