omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1/* 2 * Copyright (C) 2007-2008 ARM Limited 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17/* 18 * 19 */ 20 21 .eabi_attribute 24, 1 22 .eabi_attribute 25, 1 23 24 .arm 25 .fpu neon 26 .text 27 28 .global omxVCM4P10_FilterDeblockingLuma_VerEdge_I 29 .func omxVCM4P10_FilterDeblockingLuma_VerEdge_I 30omxVCM4P10_FilterDeblockingLuma_VerEdge_I: 31 PUSH {r4-r12,lr} 32 VPUSH {d8-d15} 33 ADD r7,r2,#1 34 ADD r8,r3,#1 35 VLD1.8 {d0[]},[r2] 36 SUB r0,r0,#4 37 VLD1.8 {d2[]},[r3] 38 LDR r4,[sp,#0x6c] 39 LDR r5,[sp,#0x68] 40 MOV r6,#0 41 VMOV.I8 d14,#0 42 VMOV.I8 d15,#0x1 43 MOV r9,#0x11000000 44 ADD r11,r1,r1 45L0x38: 46 LDRH r12,[r4],#4 47 CMP r12,#0 48 BEQ L0x160 49 ADD r10,r0,r1 50 VLD1.8 {d7},[r0],r11 51 VLD1.8 {d8},[r10],r11 52 VLD1.8 {d5},[r0],r11 53 VZIP.8 d7,d8 54 VLD1.8 {d10},[r10],r11 55 VLD1.8 {d6},[r0],r11 56 VZIP.8 d5,d10 57 VLD1.8 {d9},[r10],r11 58 VLD1.8 {d4},[r0],r11 59 VLD1.8 {d11},[r10],r11 60 VZIP.8 d6,d9 61 VZIP.16 d8,d10 62 VZIP.8 d4,d11 63 SUB r0,r0,r1,LSL #3 64 VZIP.16 d7,d5 65 VZIP.16 d9,d11 66 VZIP.16 d6,d4 67 VTRN.32 d8,d9 68 VTRN.32 d5,d4 69 VTRN.32 d10,d11 70 VTRN.32 d7,d6 71 VABD.U8 d13,d4,d8 72 VABD.U8 d12,d5,d4 73 VABD.U8 d18,d9,d8 74 VABD.U8 d19,d6,d4 75 TST r12,#0xff 76 VCGT.U8 d16,d0,d13 77 VMAX.U8 d12,d18,d12 78 VABD.U8 d17,d10,d8 79 VMOVEQ.32 d16[0],r6 80 TST r12,#0xff00 81 VCGT.U8 d19,d2,d19 82 VCGT.U8 d12,d2,d12 83 VMOVEQ.32 d16[1],r6 84 VCGT.U8 d17,d2,d17 85 VAND d16,d16,d12 86 TST r12,#4 87 VAND d12,d16,d17 88 VAND d17,d16,d19 89 BNE L0x17c 90 BL armVCM4P10_DeblockingLumabSLT4_unsafe 91 VZIP.8 d7,d6 92 VZIP.8 d30,d29 93 VZIP.8 d24,d25 94 VZIP.8 d10,d11 95 VZIP.16 d7,d30 96 ADD r10,r0,r1 97 VZIP.16 d24,d10 98 VZIP.16 d25,d11 99 VZIP.16 d6,d29 100 VTRN.32 d7,d24 101 VTRN.32 d30,d10 102 VTRN.32 d6,d25 103 VTRN.32 d29,d11 104 VST1.8 {d7},[r0],r11 105 VST1.8 {d24},[r10],r11 106 VST1.8 {d30},[r0],r11 107 VST1.8 {d10},[r10],r11 108 VST1.8 {d6},[r0],r11 109 VST1.8 {d25},[r10],r11 110 ADDS r9,r9,r9 111 VST1.8 {d29},[r0],r11 112 ADD r5,r5,#2 113 VST1.8 {d11},[r10],r1 114 SUB r0,r0,r1,LSL #3 115 VLD1.8 {d0[]},[r7] 116 ADD r0,r0,#4 117 VLD1.8 {d2[]},[r8] 118 BCC L0x38 119 B L0x1f0 120L0x160: 121 ADD r0,r0,#4 122 ADDS r9,r9,r9 123 VLD1.8 {d0[]},[r7] 124 ADD r5,r5,#4 125 VLD1.8 {d2[]},[r8] 126 BCC L0x38 127 B L0x1f0 128L0x17c: 129 BL armVCM4P10_DeblockingLumabSGE4_unsafe 130 VZIP.8 d7,d31 131 VZIP.8 d30,d29 132 VZIP.8 d24,d25 133 VZIP.8 d28,d11 134 VZIP.16 d7,d30 135 ADD r10,r0,r1 136 VZIP.16 d24,d28 137 VZIP.16 d25,d11 138 VZIP.16 d31,d29 139 VTRN.32 d7,d24 140 VTRN.32 d30,d28 141 VTRN.32 d31,d25 142 VTRN.32 d29,d11 143 VST1.8 {d7},[r0],r11 144 VST1.8 {d24},[r10],r11 145 VST1.8 {d30},[r0],r11 146 VST1.8 {d28},[r10],r11 147 VST1.8 {d31},[r0],r11 148 VST1.8 {d25},[r10],r11 149 ADDS r9,r9,r9 150 VST1.8 {d29},[r0],r11 151 ADD r5,r5,#4 152 VST1.8 {d11},[r10],r11 153 SUB r0,r0,r1,LSL #3 154 VLD1.8 {d0[]},[r7] 155 ADD r0,r0,#4 156 VLD1.8 {d2[]},[r8] 157 BCC L0x38 158L0x1f0: 159 SUB r4,r4,#0xe 160 SUB r5,r5,#0xe 161 SUB r0,r0,#0x10 162 VLD1.8 {d0[]},[r2] 163 ADD r0,r0,r1,LSL #3 164 VLD1.8 {d2[]},[r3] 165 BNE L0x38 166 MOV r0,#0 167 VPOP {d8-d15} 168 POP {r4-r12,pc} 169 .endfunc 170 171 .end 172 173