omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S revision 7ea582e1dbdd9a88b2105fbe29ed0ec92cbf70c6
1/* 2 * Copyright (C) 2007-2008 ARM Limited 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17/* 18 * 19 */ 20 21 .eabi_attribute 24, 1 22 .eabi_attribute 25, 1 23 24 .arm 25 .fpu neon 26 .text 27 28 .global omxVCM4P10_FilterDeblockingLuma_VerEdge_I 29omxVCM4P10_FilterDeblockingLuma_VerEdge_I: 30 PUSH {r4-r12,lr} 31 VPUSH {d8-d15} 32 ADD r7,r2,#1 33 ADD r8,r3,#1 34 VLD1.8 {d0[]},[r2] 35 SUB r0,r0,#4 36 VLD1.8 {d2[]},[r3] 37 LDR r4,[sp,#0x6c] 38 LDR r5,[sp,#0x68] 39 MOV r6,#0 40 VMOV.I8 d14,#0 41 VMOV.I8 d15,#0x1 42 MOV r9,#0x11000000 43 ADD r11,r1,r1 44L0x38: 45 LDRH r12,[r4],#4 46 CMP r12,#0 47 BEQ L0x160 48 ADD r10,r0,r1 49 VLD1.8 {d7},[r0],r11 50 VLD1.8 {d8},[r10],r11 51 VLD1.8 {d5},[r0],r11 52 VZIP.8 d7,d8 53 VLD1.8 {d10},[r10],r11 54 VLD1.8 {d6},[r0],r11 55 VZIP.8 d5,d10 56 VLD1.8 {d9},[r10],r11 57 VLD1.8 {d4},[r0],r11 58 VLD1.8 {d11},[r10],r11 59 VZIP.8 d6,d9 60 VZIP.16 d8,d10 61 VZIP.8 d4,d11 62 SUB r0,r0,r1,LSL #3 63 VZIP.16 d7,d5 64 VZIP.16 d9,d11 65 VZIP.16 d6,d4 66 VTRN.32 d8,d9 67 VTRN.32 d5,d4 68 VTRN.32 d10,d11 69 VTRN.32 d7,d6 70 VABD.U8 d13,d4,d8 71 VABD.U8 d12,d5,d4 72 VABD.U8 d18,d9,d8 73 VABD.U8 d19,d6,d4 74 TST r12,#0xff 75 VCGT.U8 d16,d0,d13 76 VMAX.U8 d12,d18,d12 77 VABD.U8 d17,d10,d8 78 VMOVEQ.32 d16[0],r6 79 TST r12,#0xff00 80 VCGT.U8 d19,d2,d19 81 VCGT.U8 d12,d2,d12 82 VMOVEQ.32 d16[1],r6 83 VCGT.U8 d17,d2,d17 84 VAND d16,d16,d12 85 TST r12,#4 86 VAND d12,d16,d17 87 VAND d17,d16,d19 88 BNE L0x17c 89 BL armVCM4P10_DeblockingLumabSLT4_unsafe 90 VZIP.8 d7,d6 91 VZIP.8 d30,d29 92 VZIP.8 d24,d25 93 VZIP.8 d10,d11 94 VZIP.16 d7,d30 95 ADD r10,r0,r1 96 VZIP.16 d24,d10 97 VZIP.16 d25,d11 98 VZIP.16 d6,d29 99 VTRN.32 d7,d24 100 VTRN.32 d30,d10 101 VTRN.32 d6,d25 102 VTRN.32 d29,d11 103 VST1.8 {d7},[r0],r11 104 VST1.8 {d24},[r10],r11 105 VST1.8 {d30},[r0],r11 106 VST1.8 {d10},[r10],r11 107 VST1.8 {d6},[r0],r11 108 VST1.8 {d25},[r10],r11 109 ADDS r9,r9,r9 110 VST1.8 {d29},[r0],r11 111 ADD r5,r5,#2 112 VST1.8 {d11},[r10],r1 113 SUB r0,r0,r1,LSL #3 114 VLD1.8 {d0[]},[r7] 115 ADD r0,r0,#4 116 VLD1.8 {d2[]},[r8] 117 BCC L0x38 118 B L0x1f0 119L0x160: 120 ADD r0,r0,#4 121 ADDS r9,r9,r9 122 VLD1.8 {d0[]},[r7] 123 ADD r5,r5,#4 124 VLD1.8 {d2[]},[r8] 125 BCC L0x38 126 B L0x1f0 127L0x17c: 128 BL armVCM4P10_DeblockingLumabSGE4_unsafe 129 VZIP.8 d7,d31 130 VZIP.8 d30,d29 131 VZIP.8 d24,d25 132 VZIP.8 d28,d11 133 VZIP.16 d7,d30 134 ADD r10,r0,r1 135 VZIP.16 d24,d28 136 VZIP.16 d25,d11 137 VZIP.16 d31,d29 138 VTRN.32 d7,d24 139 VTRN.32 d30,d28 140 VTRN.32 d31,d25 141 VTRN.32 d29,d11 142 VST1.8 {d7},[r0],r11 143 VST1.8 {d24},[r10],r11 144 VST1.8 {d30},[r0],r11 145 VST1.8 {d28},[r10],r11 146 VST1.8 {d31},[r0],r11 147 VST1.8 {d25},[r10],r11 148 ADDS r9,r9,r9 149 VST1.8 {d29},[r0],r11 150 ADD r5,r5,#4 151 VST1.8 {d11},[r10],r11 152 SUB r0,r0,r1,LSL #3 153 VLD1.8 {d0[]},[r7] 154 ADD r0,r0,#4 155 VLD1.8 {d2[]},[r8] 156 BCC L0x38 157L0x1f0: 158 SUB r4,r4,#0xe 159 SUB r5,r5,#0xe 160 SUB r0,r0,#0x10 161 VLD1.8 {d0[]},[r2] 162 ADD r0,r0,r1,LSL #3 163 VLD1.8 {d2[]},[r3] 164 BNE L0x38 165 MOV r0,#0 166 VPOP {d8-d15} 167 POP {r4-r12,pc} 168 169 .end 170 171