omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1/* 2 * Copyright (C) 2007-2008 ARM Limited 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17/* 18 * 19 */ 20 21 .eabi_attribute 24, 1 22 .eabi_attribute 25, 1 23 24 .arm 25 .fpu neon 26 .text 27 28 .global omxVCM4P10_FilterDeblockingLuma_HorEdge_I 29 .func omxVCM4P10_FilterDeblockingLuma_HorEdge_I 30omxVCM4P10_FilterDeblockingLuma_HorEdge_I: 31 PUSH {r4-r12,lr} 32 VPUSH {d8-d15} 33 ADD r7,r2,#1 34 ADD r8,r3,#1 35 VLD1.8 {d0[]},[r2] 36 SUB r0,r0,r1,LSL #2 37 VLD1.8 {d2[]},[r3] 38 LDR r4,[sp,#0x6c] 39 LDR r5,[sp,#0x68] 40 MOV r11,#0 41 VMOV.I8 d14,#0 42 VMOV.I8 d15,#0x1 43 ADD r10,r1,r1 44 MOV r9,#0x55000000 45L0x38: 46 LDRH r12,[r4],#2 47 ADD r6,r0,r1 48 CMP r12,#0 49 BEQ L0xe4 50 VLD1.8 {d7},[r0],r10 51 VLD1.8 {d6},[r6],r10 52 VLD1.8 {d5},[r0],r10 53 VLD1.8 {d4},[r6],r10 54 VLD1.8 {d8},[r0],r10 55 VABD.U8 d12,d4,d5 56 VLD1.8 {d9},[r6] 57 VABD.U8 d13,d8,d4 58 VLD1.8 {d10},[r0],r1 59 VABD.U8 d18,d9,d8 60 VABD.U8 d19,d6,d4 61 VCGT.U8 d16,d0,d13 62 TST r12,#0xff 63 VMAX.U8 d12,d18,d12 64 VABD.U8 d17,d10,d8 65 VMOVEQ.32 d16[0],r11 66 TST r12,#0xff00 67 VCGT.U8 d19,d2,d19 68 VCGT.U8 d12,d2,d12 69 VMOVEQ.32 d16[1],r11 70 VCGT.U8 d17,d2,d17 71 VLD1.8 {d11},[r0] 72 VAND d16,d16,d12 73 TST r12,#4 74 VAND d12,d16,d17 75 VAND d17,d16,d19 76 BNE L0xf8 77 SUB r0,r0,r1,LSL #2 78 SUB r0,r0,r1 79 BL armVCM4P10_DeblockingLumabSLT4_unsafe 80 VST1.8 {d30},[r0],r1 81 VST1.8 {d29},[r0],r1 82 SUB r6,r0,r1,LSL #2 83 VST1.8 {d24},[r0],r1 84 ADDS r9,r9,r9 85 VST1.8 {d25},[r0] 86 ADD r0,r6,#8 87 BCC L0x38 88 B L0x130 89L0xe4: 90 ADD r0,r0,#8 91 ADDS r9,r9,r9 92 ADD r5,r5,#2 93 BCC L0x38 94 B L0x130 95L0xf8: 96 SUB r0,r0,r1,LSL #2 97 SUB r0,r0,r1,LSL #1 98 BL armVCM4P10_DeblockingLumabSGE4_unsafe 99 VST1.8 {d31},[r0],r1 100 VST1.8 {d30},[r0],r1 101 VST1.8 {d29},[r0],r1 102 SUB r6,r0,r1,LSL #2 103 VST1.8 {d24},[r0],r1 104 ADDS r9,r9,r9 105 VST1.8 {d25},[r0],r1 106 ADD r5,r5,#2 107 VST1.8 {d28},[r0] 108 ADD r0,r6,#8 109 BCC L0x38 110L0x130: 111 SUB r0,r0,#0x10 112 VLD1.8 {d0[]},[r7] 113 ADD r0,r0,r1,LSL #2 114 VLD1.8 {d2[]},[r8] 115 BNE L0x38 116 MOV r0,#0 117 VPOP {d8-d15} 118 POP {r4-r12,pc} 119 .endfunc 120 121 .end 122 123