1ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 2ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * Copyright (C) 2012 The Android Open Source Project 3ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * 4ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * Licensed under the Apache License, Version 2.0 (the "License"); 5ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * you may not use this file except in compliance with the License. 6ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * You may obtain a copy of the License at 7ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * 8ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * http://www.apache.org/licenses/LICENSE-2.0 9ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * 10ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * Unless required by applicable law or agreed to in writing, software 11ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * distributed under the License is distributed on an "AS IS" BASIS, 12ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * See the License for the specific language governing permissions and 14ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * limitations under the License. 15ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams */ 16ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 17ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 18ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 19ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams#include <machine/cpu-features.h> 20ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams#include <machine/asm.h> 21ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 22ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 23ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 24ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = y0 base pointer 25ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = y1 base pointer 26ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r3 = y2 base pointer 27ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams sp = coeffs 28ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams sp = length / 2 29ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 30ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 31ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicConvolve3x3_K) 32ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams push {r4-r8, r10, r11, lr} 33ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 34ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 35ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Get the coeffs pointer from the stack and load the 36ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams coefficients in the q0, q1 NEON registers */ 37ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldr r4, [sp, #32+64] 38ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.16 {q0, q1}, [r4] 39ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 40ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Get count from the stack */ 41ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldr r4, [sp, #36+64] 42ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 43ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Load the frequently used immediate in a register */ 44ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r5, #8 45ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 46ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 47ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Load and post-increase the address by r5=#8 */ 48ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.8 {q13}, [r1], r5 49ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.8 {q14}, [r2], r5 50ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.8 {q15}, [r3], r5 51ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 52ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Signal memory for data that will be used in the loop after the next */ 53ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams PLD (r1, r5) 54ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams PLD (r2, r5) 55ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams PLD (r3, r5) 56ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 57ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q2, d26 58ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q3, d27 59ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q4, d28 60ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q5, d29 61ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q6, d30 62ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q7, d31 63ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 64ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 65ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams The two pixel source array is 66ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams d4, d5, d6, d7 67ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams d8, d9, d10, d11 68ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams d12, d13, d14, d15 69ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 70ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 71ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmull.s16 q8, d4, d0[0] 72ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q8, d5, d0[1] 73ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q8, d6, d0[2] 74ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q8, d8, d0[3] 75ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q8, d9, d1[0] 76ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q8, d10, d1[1] 77ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q8, d12, d1[2] 78ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q8, d13, d1[3] 79ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q8, d14, d2[0] 80ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 81ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmull.s16 q9, d5, d0[0] 82ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q9, d6, d0[1] 83ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q9, d7, d0[2] 84ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q9, d9, d0[3] 85ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q9, d10, d1[0] 86ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q9, d11, d1[1] 87ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q9, d13, d1[2] 88ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q9, d14, d1[3] 89ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q9, d15, d2[0] 90ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 91ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d16, q8, #8 92ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d17, q9, #8 93ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 94ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d16, q8 95ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst1.8 d16, [r0]! 96ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 97ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Are we done yet? */ 98ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r4, r4, #1 99ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 100ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 101ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* We're done, bye! */ 102ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 103ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams pop {r4-r8, r10, r11, lr} 104ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 105ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(TestConvolveK) 106ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 107ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 108ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 109ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = src 110ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = matrix 111ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r3 = length 112ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 113ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicColorMatrix4x4_K) 114ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams .save {r4, lr} 115ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams stmfd sp!, {r4, lr} 116ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 117ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 118ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.16 {q2}, [r2]! 119ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.16 {q3}, [r2]! 120ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 121ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 122ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 123ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 124ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 125ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 126ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 127ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q12, d0 /* R */ 128ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q13, d1 /* G */ 129ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q14, d2 /* B */ 130ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q15, d3 /* A */ 131ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 132ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmull.s16 q8, d24, d4[0] 133ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmull.s16 q9, d24, d4[1] 134ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmull.s16 q10, d24, d4[2] 135ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmull.s16 q11, d24, d4[3] 136ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 137ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q8, d26, d5[0] 138ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q9, d26, d5[1] 139ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q10, d26, d5[2] 140ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q11, d26, d5[3] 141ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 142ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q8, d28, d6[0] 143ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q9, d28, d6[1] 144ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q10, d28, d6[2] 145ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q11, d28, d6[3] 146ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 147ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q8, d30, d7[0] 148ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q9, d30, d7[1] 149ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q10, d30, d7[2] 150ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q11, d30, d7[3] 151ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 152ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d24, q8, #8 153ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d26, q9, #8 154ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d28, q10, #8 155ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d30, q11, #8 156ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 157ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d0, q12 158ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d1, q13 159ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d2, q14 160ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d3, q15 161ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 162ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 163ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 164ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 165ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 166ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 167ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r3, r3, #1 168ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 169ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 170ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 171ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldmfd sp!, {r4, lr} 172ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 173ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicColorMatrix4x4_K) 174ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 175ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 176ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 177ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = src 178ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = matrix 179ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r3 = length 180ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 181ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicColorMatrix3x3_K) 182ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams .save {r4, lr} 183ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams stmfd sp!, {r4, lr} 184ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 185ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 186ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.16 {q2}, [r2]! 187ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.16 {q3}, [r2]! 188ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 189ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 190ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 191ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 192ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 193ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 194ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 195ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q12, d0 196ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q13, d1 197ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q14, d2 198ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 199ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmull.s16 q8, d24, d4[0] 200ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmull.s16 q9, d24, d4[1] 201ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmull.s16 q10, d24, d4[2] 202ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 203ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q8, d26, d5[0] 204ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q9, d26, d5[1] 205ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q10, d26, d5[2] 206ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 207ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q8, d28, d6[0] 208ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q9, d28, d6[1] 209ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q10, d28, d6[2] 210ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 211ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d24, q8, #8 212ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d26, q9, #8 213ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d28, q10, #8 214ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 215ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d0, q12 216ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d1, q13 217ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d2, q14 218ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 219ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 220ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 221ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 222ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 223ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 224ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r3, r3, #1 225ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 226ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 227ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 228ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldmfd sp!, {r4, lr} 229ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 230ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicColorMatrix3x3_K) 231ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 232ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 233ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 234ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = src 235ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = matrix 236ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r3 = length 237ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 238ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicColorMatrixDot_K) 239ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams .save {r4, lr} 240ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams stmfd sp!, {r4, lr} 241ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 242ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 243ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.16 {q2}, [r2]! 244ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.16 {q3}, [r2]! 245ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 246ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 247ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 248ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 249ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 250ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 251ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 252ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q12, d0 253ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q13, d1 254ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q14, d2 255ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 256ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmull.s16 q8, d24, d4[0] 257ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q8, d26, d5[0] 258ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q8, d28, d6[0] 259ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d24, q8, #8 260ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d0, q12 261ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmov.u8 d1, d0 262ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmov.u8 d2, d0 263ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 264ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 265ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 266ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 267ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 268ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 269ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r3, r3, #1 270ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 271ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 272ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 273ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldmfd sp!, {r4, lr} 274ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 275ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicColorMatrixDot_K) 276ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 277ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 278ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 279ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Samsstatic void OneVF(float4 *out, const uchar *ptrIn, int iStride, 280ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams const float* gPtr, int iradius, int x1, int x2) 281ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 282ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = out 283ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = pin 284ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = stride 285ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r3 = gptr 286ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r4 = sp, ct 287ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r5 = sp+4, x1 288ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r6 = sp+8, x2 289ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 290ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlurVF_K) 291ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams push {r4-r8, r10, r11, lr} 292ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 293ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 294ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldr r4, [sp, #32+64] 295ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldr r5, [sp, #32+64 + 4] 296ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldr r6, [sp, #32+64 + 8] 297ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 298ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 299ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams veor q10, q10, q10 /* float4 blurredPixel = 0; */ 300ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams veor q11, q11, q11 /* float4 blurredPixel = 0; */ 301ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams add r7, r1, r5, lsl #2 /* const uchar *pi = ptrIn + x1 * 4; */ 302ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r10, r3 303ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 304ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r11, r4 305ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 306ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams2: 307ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.32 {d2}, [r7] 308ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q1, d2 309ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u16 q3, d2 310ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u16 q4, d3 311ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vcvt.f32.s32 q3, q3 312ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vcvt.f32.s32 q4, q4 313ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.32 {d0[0]}, [r10]! 314ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams add r7, r7, r2 315ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.f32 q10, q3, d0[0] 316ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.f32 q11, q4, d0[0] 317ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r11, r11, #1 318ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 2b 319ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 320ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst1.32 {q10}, [r0]! 321ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst1.32 {q11}, [r0]! 322ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams add r5, r5, #2 323ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams cmp r5, r6 324ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 325ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 326ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 327ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 328ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams pop {r4-r8, r10, r11, lr} 329ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 330ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlurVF_K) 331ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 332ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 333ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Samsstatic void OneVF(float4 *out, const uchar *ptrIn, int iStride, 334ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams const float* gPtr, int iradius, int x1, int x2) 335ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 336ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = out 337ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = pin 338ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = gptr 339ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r3 = ct 340ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r4 = sp, x1 341ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r5 = sp+4, x2 342ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 343ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlurHF_K) 344ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams push {r4-r8, r10, r11, lr} 345ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 346ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 347ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldr r4, [sp, #32+64] 348ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldr r5, [sp, #32+64 + 4] 349ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 350ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 351ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams add r7, r1, r4, lsl #4 /* const uchar *pi = ptrIn + x1 * 4; */ 352ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r10, r2 353ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r11, r3 354ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 355ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.32 {q1}, [r7]! 356ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.32 {d6[0]}, [r10]! 357ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.f32 q0, q1, d6[0] 358ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams sub r11, r11, #1 359ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 360ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams2: 361ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.32 {q1}, [r7]! 362ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.32 {q2}, [r7]! 363ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.32 {d6[0]}, [r10]! 364ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.32 {d6[1]}, [r10]! 365ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.f32 q0, q1, d6[0] 366ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.f32 q0, q2, d6[1] 367ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r11, r11, #2 368ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 2b 369ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 370ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vcvt.s32.f32 q0, q0 371ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovn.u32 d0, q0 372ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovn.u16 d0, q0 373ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 374ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst1.32 {d0[0]}, [r0]! 375ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams add r4, r4, #1 376ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams cmp r4, r5 377ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 378ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 379ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 380ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams pop {r4-r8, r10, r11, lr} 381ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 382ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlurHF_K) 383ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 384ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 385ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 386ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = Y 387ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = VU 388ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r3 = length (pixels / 8) 389ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r4 = sp, params 390ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 391ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams This function converts 8 pixels per iteration 392ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 393ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicYuv_K) 394ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams push {r4-r8, r10, r11, lr} 395ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 396ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 397ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldr r4, [sp, #32+64] 398ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.16 {q2}, [r4]! // mults 399ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.16 {q3}, [r4]! // y offset 400ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.16 {q4}, [r4]! // 128 401ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vdup.8 d3, d5[1] 402ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 403ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 404ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.8 {d10}, [r1]! 405ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.8 {d12}, [r2]! 406ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q5, d10 // Y at .16 407ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q6, d12 // vu at .16 408ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 409ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vsub.i16 q5, q5, q3 410ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vsub.i16 q6, q6, q4 411ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vtrn.16 d12, d13 // d12 = u, d13 = v 412ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmov q7, q6 413ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vtrn.16 d12, d14 414ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vtrn.32 d12, d14 415ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vtrn.16 d13, d15 416ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vtrn.32 d13, d15 417ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 418ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmull.s16 q8, d10, d4[0] 419ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmull.s16 q11, d11, d4[0] 420ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmov q9, q8 421ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmov q10, q8 422ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmov q12, q11 423ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmov q13, q11 424ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 425ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q8, d12, d4[1] 426ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q9, d12, d5[0] 427ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q10, d13, d4[3] 428ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q9, d13, d4[2] 429ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 430ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q11, d14, d4[1] 431ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q12, d14, d5[0] 432ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q13, d15, d4[3] 433ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q12, d15, d4[2] 434ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 435ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 436ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d16, q8, #8 437ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d18, q9, #8 438ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d20, q10, #8 439ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d0, q8 440ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d1, q9 441ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d2, q10 442ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 443ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 444ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 445ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 446ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 447ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d16, q11, #8 448ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d18, q12, #8 449ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d20, q13, #8 450ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d0, q8 451ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d1, q9 452ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d2, q10 453ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 454ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 455ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 456ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 457ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 458ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r3, r3, #1 459ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 460ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 461ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 462ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams pop {r4-r8, r10, r11, lr} 463ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 464ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicYuv_K) 465ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 466ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* Convolve 5x5 */ 467ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 468ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 469ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 470ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = y0 base pointer 471ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = y1 base pointer 472ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r3 = y2 base pointer 473ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r4 = y3 base pointer 474ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r5 = y4 base pointer 475ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r6 = coeffs 476ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r7 = length 477ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 478ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicConvolve5x5_K) 479ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams push {r4-r7, lr} 480ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 481ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 482ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* load y3 in r4 */ 483ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldr r4, [sp, #20 + 64] 484ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 485ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* load y4 in r5 */ 486ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldr r5, [sp, #24 + 64] 487ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 488ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Load the coefficients pointer */ 489ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldr r6, [sp, #28 + 64] 490ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 491ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Create the coefficients vector */ 492ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.16 {d0, d1, d2, d3}, [r6]! 493ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.16 {d4, d5, d6}, [r6] 494ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 495ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* load the count */ 496ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldr r6, [sp, #32 + 64] 497ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 498ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Load the frequently used immediate in a register */ 499ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r7, #8 500ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 501ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 502ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Load the y base pointers in Qregs and post-increase the address by r7=#8 */ 503ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.8 {d24, d25, d26}, [r1], r7 @ y0 ( y - 2 ) 504ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.8 {d27, d28, d29}, [r2], r7 @ y0 ( y - 1 ) 505ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 506ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Signal memory for data that will be used in the loop after the next */ 507ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams PLD (r1, r7) 508ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams PLD (r2, r7) 509ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 510ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Promoting the 8bit channels to 16bit */ 511ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q9, d24 512ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q10, d25 513ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q11, d26 514ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q12, d27 515ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q13, d28 516ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q14, d29 517ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 518ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 519ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams d18, d19, d20, d21, d22, d23, 520ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams d24, d25 521ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 522ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmull.s16 q4, d18, d0[0] 523ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d19, d0[1] 524ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d20, d0[2] 525ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d21, d0[3] 526ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d22, d1[0] 527ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 528ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d24, d1[1] 529ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d25, d1[2] 530ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d26, d1[3] 531ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d27, d2[0] 532ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d28, d2[1] 533ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 534ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmull.s16 q5, d19, d0[0] 535ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d20, d0[1] 536ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d21, d0[2] 537ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d22, d0[3] 538ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d23, d1[0] 539ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 540ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d25, d1[1] 541ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d26, d1[2] 542ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d27, d1[3] 543ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d28, d2[0] 544ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d29, d2[1] 545ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 546ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 547ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Next 2 rows */ 548ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Load the y base pointers in Qregs and post-increase the address by r7=#8 */ 549ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.8 {d24, d25, d26}, [r3], r7 @ y0 ( y ) 550ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.8 {d27, d28, d29}, [r4], r7 @ y0 ( y + 1 ) 551ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 552ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Signal memory for data that will be used in the loop after the next */ 553ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams PLD (r3, r7) 554ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams PLD (r4, r7) 555ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 556ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Promoting the 8bit channels to 16bit */ 557ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q9, d24 558ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q10, d25 559ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q11, d26 560ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q12, d27 561ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q13, d28 562ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q14, d29 563ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 564ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 565ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams d18, d19, d20, d21, d22, d23, 566ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams d24, d25 567ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 568ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d18, d2[2] 569ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d19, d2[3] 570ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d20, d3[0] 571ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d21, d3[1] 572ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d22, d3[2] 573ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 574ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d24, d3[3] 575ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d25, d4[0] 576ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d26, d4[1] 577ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d27, d4[2] 578ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d28, d4[3] 579ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 580ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d19, d2[2] 581ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d20, d2[3] 582ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d21, d3[0] 583ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d22, d3[1] 584ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d23, d3[2] 585ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 586ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d25, d3[3] 587ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d26, d4[0] 588ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d27, d4[1] 589ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d28, d4[2] 590ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d29, d4[3] 591ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 592ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Last row */ 593ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Load the y base pointers in Qregs and post-increase the address by r7=#8 */ 594ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld1.8 {d24, d25, d26}, [r5], r7 @ y0 ( y + 2 ) 595ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 596ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Signal memory for data that will be used in the loop after the next */ 597ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams PLD (r5, r7) 598ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 599ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Promoting the 8bit channels to 16bit */ 600ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q9, d24 601ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q10, d25 602ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q11, d26 603ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 604ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 605ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams d18, d19, d20, d21, d22, d23, 606ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams d24, d25 607ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 608ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 609ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d18, d5[0] 610ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d19, d5[1] 611ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d20, d5[2] 612ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d21, d5[3] 613ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q4, d22, d6[0] 614ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 615ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d19, d5[0] 616ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d20, d5[1] 617ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d21, d5[2] 618ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d22, d5[3] 619ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmlal.s16 q5, d23, d6[0] 620ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 621ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 622ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 623ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 624ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* Narrow it to a d-reg 32 -> 16 bit */ 625ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d8, q4, #8 626ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i32 d9, q5, #8 627ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 628ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* Pack 16 -> 8 bit, saturate, put two pixels into D reg */ 629ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d8, q4 630ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 631ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst1.8 d8, [r0]! @ return the output and increase the address of r0 632ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 633ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Are we done? */ 634ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r6, r6, #1 635ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 636ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 637ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* Yup, bye */ 638ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 639ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams pop {r4-r7, lr} 640ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 641ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 642ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicConvolve5x5_K) 643ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 644ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 645ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 646ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 647ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 648ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams dst = src + dst * (1.0 - src.a) 649ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 650ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 651ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = src 652ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = length 653ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 654ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendSrcOver_K) 655ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams .save {r4, lr} 656ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams stmfd sp!, {r4, lr} 657ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 658ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 659ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, #255 660ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vdup.16 q7, r4 661ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 662ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, r0 663ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 664ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 665ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* src */ 666ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 667ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 668ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 669ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 670ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 671ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 672ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 673ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 674ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshll.u8 q12, d0, #8 675ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshll.u8 q13, d1, #8 676ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshll.u8 q14, d2, #8 677ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q6, d3 678ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vsub.i16 q6, q7, q6 // q6 = 1 - src.a 679ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshll.u8 q15, d3, #8 680ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 681ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* dst */ 682ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 683ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 684ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 685ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 686ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 687ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 688ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 689ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 690ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q8, d0 691ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q9, d1 692ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q10, d2 693ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q11, d3 694ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 695ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.i16 q12, q8, q6 696ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.i16 q13, q9, q6 697ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.i16 q14, q10, q6 698ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.i16 q15, q11, q6 699ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 700ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d0, q12, #8 701ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d1, q13, #8 702ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d2, q14, #8 703ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d3, q15, #8 704ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 705ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 706ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 707ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 708ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 709ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 710ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 711ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 712ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 713ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r2, r2, #1 714ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 715ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 716ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 717ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldmfd sp!, {r4, lr} 718ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 719ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendSrcOver_K) 720ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 721ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 722ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams dst = dst + src * (1.0 - dst.a) 723ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 724ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 725ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = src 726ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = length 727ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 728ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendDstOver_K) 729ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams .save {r4, lr} 730ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams stmfd sp!, {r4, lr} 731ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 732ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 733ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, #255 734ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vdup.16 q7, r4 735ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 736ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, r0 737ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 738ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 739ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* src */ 740ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 741ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 742ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 743ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 744ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 745ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 746ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 747ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 748ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q12, d0 749ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q13, d1 750ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q14, d2 751ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q15, d3 752ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 753ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* dst */ 754ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 755ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 756ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 757ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 758ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 759ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 760ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 761ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 762ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshll.u8 q8, d0, #8 763ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshll.u8 q9, d1, #8 764ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshll.u8 q10, d2, #8 765ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q6, d3 766ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vsub.i16 q6, q7, q6 // q6 = 1 - dst.a 767ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshll.u8 q11, d3, #8 768ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 769ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 770ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.i16 q8, q12, q6 771ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.i16 q9, q13, q6 772ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.i16 q10, q14, q6 773ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.i16 q11, q15, q6 774ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 775ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d0, q8, #8 776ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d1, q9, #8 777ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d2, q10, #8 778ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d3, q11, #8 779ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 780ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 781ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 782ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 783ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 784ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 785ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 786ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 787ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 788ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r2, r2, #1 789ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 790ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 791ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 792ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldmfd sp!, {r4, lr} 793ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 794ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendDstOver_K) 795ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 796ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 797ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams dst = src * dst.a 798ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 799ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 800ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = src 801ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = length 802ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 803ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendSrcIn_K) 804ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams .save {r4, lr} 805ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams stmfd sp!, {r4, lr} 806ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 807ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 808ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, r0 809ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 810ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 811ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* src */ 812ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 813ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 814ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 815ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 816ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 817ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 818ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 819ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 820ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q12, d0 821ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q13, d1 822ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q14, d2 823ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q15, d3 824ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 825ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* dst */ 826ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 827ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 828ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 829ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 830ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 831ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 832ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 833ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 834ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams //vmovl.u8 q8, d0 835ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams //vmovl.u8 q9, d1 836ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams //vmovl.u8 q10, d2 837ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q11, d3 838ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 839ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q12, q12, q11 840ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q13, q13, q11 841ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q14, q14, q11 842ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q15, q15, q11 843ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 844ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d0, q12, #8 845ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d1, q13, #8 846ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d2, q14, #8 847ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d3, q15, #8 848ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 849ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 850ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 851ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 852ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 853ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 854ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 855ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 856ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 857ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r2, r2, #1 858ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 859ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 860ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 861ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldmfd sp!, {r4, lr} 862ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 863ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendSrcIn_K) 864ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 865ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 866ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams dst = dst * src.a 867ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 868ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 869ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = src 870ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = length 871ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 872ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendDstIn_K) 873ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams .save {r4, lr} 874ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams stmfd sp!, {r4, lr} 875ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 876ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 877ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, r0 878ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 879ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 880ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* src */ 881ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 882ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 883ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 884ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 885ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 886ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 887ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 888ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 889ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams //vmovl.u8 q12, d0 890ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams //vmovl.u8 q13, d1 891ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams //vmovl.u8 q14, d2 892ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q15, d3 893ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 894ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* dst */ 895ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 896ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 897ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 898ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 899ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 900ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 901ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 902ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 903ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q8, d0 904ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q9, d1 905ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q10, d2 906ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q11, d3 907ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 908ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q8, q8, q15 909ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q9, q9, q15 910ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q10, q10, q15 911ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q11, q11, q15 912ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 913ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d0, q8, #8 914ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d1, q9, #8 915ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d2, q10, #8 916ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d3, q11, #8 917ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 918ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 919ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 920ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 921ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 922ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 923ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 924ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 925ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 926ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r2, r2, #1 927ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 928ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 929ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 930ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldmfd sp!, {r4, lr} 931ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 932ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendDstIn_K) 933ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 934ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 935ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 936ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 937ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams dst = src * (1.0 - dst.a) 938ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 939ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 940ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = src 941ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = length 942ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 943ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendSrcOut_K) 944ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams .save {r4, lr} 945ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams stmfd sp!, {r4, lr} 946ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 947ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 948ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, #255 949ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vdup.16 q7, r4 950ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 951ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, r0 952ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 953ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 954ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* src */ 955ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 956ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 957ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 958ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 959ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 960ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 961ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 962ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 963ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q12, d0 964ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q13, d1 965ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q14, d2 966ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q15, d3 967ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 968ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* dst */ 969ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 970ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 971ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 972ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 973ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 974ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 975ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 976ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 977ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams //vmovl.u8 q8, d0 978ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams //vmovl.u8 q9, d1 979ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams //vmovl.u8 q10, d2 980ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q11, d3 981ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 982ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 983ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vsub.i16 q6, q7, q11 // q6 = 1 - dst.a 984ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q12, q12, q6 985ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q13, q13, q6 986ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q14, q14, q6 987ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q15, q15, q6 988ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 989ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d0, q12, #8 990ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d1, q13, #8 991ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d2, q14, #8 992ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d3, q15, #8 993ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 994ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 995ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 996ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 997ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 998ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 999ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 1000ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 1001ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1002ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r2, r2, #1 1003ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 1004ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1005ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 1006ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldmfd sp!, {r4, lr} 1007ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 1008ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendSrcOut_K) 1009ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1010ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1011ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 1012ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams dst = dst * (1.0 - src.a) 1013ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1014ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 1015ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = src 1016ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = length 1017ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 1018ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendDstOut_K) 1019ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams .save {r4, lr} 1020ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams stmfd sp!, {r4, lr} 1021ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 1022ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1023ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, #255 1024ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vdup.16 q7, r4 1025ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1026ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, r0 1027ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 1028ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1029ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* src */ 1030ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 1031ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 1032ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 1033ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 1034ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 1035ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 1036ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 1037ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 1038ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams //vmovl.u8 q12, d0 1039ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams //vmovl.u8 q13, d1 1040ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams //vmovl.u8 q14, d2 1041ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q15, d3 1042ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1043ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* dst */ 1044ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 1045ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 1046ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 1047ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 1048ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 1049ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 1050ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 1051ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 1052ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q8, d0 1053ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q9, d1 1054ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q10, d2 1055ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q11, d3 1056ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1057ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1058ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vsub.i16 q6, q7, q15 // q6 = 1 - src.a 1059ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q12, q8, q6 1060ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q13, q9, q6 1061ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q14, q10, q6 1062ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q15, q11, q6 1063ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1064ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d0, q12, #8 1065ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d1, q13, #8 1066ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d2, q14, #8 1067ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d3, q15, #8 1068ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 1069ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 1070ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 1071ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 1072ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 1073ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 1074ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 1075ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 1076ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1077ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r2, r2, #1 1078ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 1079ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1080ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 1081ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldmfd sp!, {r4, lr} 1082ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 1083ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendDstOut_K) 1084ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1085ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1086ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 1087ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams dst.rgb = src.rgb * dst.a + (1.0 - src.a) * dst.rgb 1088ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams dst.a = dst.a 1089ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1090ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 1091ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = src 1092ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = length 1093ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 1094ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendSrcAtop_K) 1095ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams .save {r4, lr} 1096ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams stmfd sp!, {r4, lr} 1097ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 1098ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1099ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, #255 1100ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vdup.16 q7, r4 1101ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1102ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, r0 1103ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 1104ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1105ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* src */ 1106ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 1107ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 1108ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 1109ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 1110ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 1111ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 1112ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 1113ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 1114ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q12, d0 1115ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q13, d1 1116ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q14, d2 1117ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q15, d3 1118ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1119ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* dst */ 1120ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 1121ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 1122ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 1123ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 1124ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 1125ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 1126ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 1127ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 1128ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q8, d0 1129ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q9, d1 1130ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q10, d2 1131ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q11, d3 1132ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1133ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1134ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vsub.i16 q6, q7, q15 // q6 = 1 - src.a 1135ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q8, q8, q6 1136ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q9, q9, q6 1137ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q10, q10, q6 1138ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1139ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.i16 q8, q12, q11 1140ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.i16 q9, q13, q11 1141ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.i16 q10, q14, q11 1142ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1143ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1144ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d0, q8, #8 1145ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d1, q9, #8 1146ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d2, q10, #8 1147ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams //vshrn.i16 d3, q15, #8 1148ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 1149ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 1150ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 1151ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 1152ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 1153ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 1154ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 1155ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 1156ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1157ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r2, r2, #1 1158ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 1159ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1160ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 1161ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldmfd sp!, {r4, lr} 1162ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 1163ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendSrcAtop_K) 1164ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1165ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 1166ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams dst = dst.rgb * src.a + (1.0 - dst.a) * src.rgb 1167ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams dst.a = src.a 1168ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1169ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 1170ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = src 1171ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = length 1172ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 1173ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendDstAtop_K) 1174ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams .save {r4, lr} 1175ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams stmfd sp!, {r4, lr} 1176ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 1177ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1178ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, #255 1179ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vdup.16 q7, r4 1180ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1181ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, r0 1182ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 1183ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1184ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* src */ 1185ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 1186ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 1187ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 1188ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 1189ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 1190ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 1191ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 1192ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 1193ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q12, d0 1194ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q13, d1 1195ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q14, d2 1196ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q15, d3 1197ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1198ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* dst */ 1199ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 1200ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 1201ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 1202ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 1203ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 1204ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 1205ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 1206ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 1207ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q8, d0 1208ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q9, d1 1209ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q10, d2 1210ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q11, d3 1211ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1212ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1213ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vsub.i16 q6, q7, q11 // q6 = 1 - dst.a 1214ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q12, q12, q6 1215ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q13, q13, q6 1216ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q14, q14, q6 1217ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1218ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.i16 q12, q8, q15 1219ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.i16 q13, q9, q15 1220ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmla.i16 q14, q10, q15 1221ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1222ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1223ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d0, q12, #8 1224ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d1, q13, #8 1225ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d2, q14, #8 1226ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams //vshrn.i16 d3, q15, #8 1227ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 1228ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 1229ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 1230ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 1231ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 1232ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 1233ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 1234ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 1235ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1236ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r2, r2, #1 1237ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 1238ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1239ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 1240ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldmfd sp!, {r4, lr} 1241ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 1242ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendDstAtop_K) 1243ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1244ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 1245ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams dst = dst ^ src 1246ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1247ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 1248ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = src 1249ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = length 1250ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 1251ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendXor_K) 1252ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams .save {r4, lr} 1253ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams stmfd sp!, {r4, lr} 1254ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 1255ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1256ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, #255 1257ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vdup.16 q7, r4 1258ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1259ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, r0 1260ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 1261ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1262ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* src */ 1263ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 1264ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 1265ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 1266ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 1267ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 1268ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 1269ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 1270ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 1271ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmov.u8 d4, d0 1272ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmov.u8 d5, d1 1273ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmov.u8 d6, d2 1274ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmov.u8 d7, d3 1275ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1276ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* dst */ 1277ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 1278ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 1279ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 1280ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 1281ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 1282ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 1283ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 1284ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 1285ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1286ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams veor d0, d0, d4 1287ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams veor d1, d1, d5 1288ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams veor d2, d2, d6 1289ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams veor d3, d3, d7 1290ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1291ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 1292ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 1293ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 1294ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 1295ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 1296ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 1297ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 1298ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 1299ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1300ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r2, r2, #1 1301ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 1302ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1303ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 1304ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldmfd sp!, {r4, lr} 1305ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 1306ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendXor_K) 1307ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1308ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 1309ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams dst = dst * src 1310ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1311ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 1312ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = src 1313ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = length 1314ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 1315ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendMultiply_K) 1316ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams .save {r4, lr} 1317ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams stmfd sp!, {r4, lr} 1318ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 1319ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1320ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, #255 1321ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vdup.16 q7, r4 1322ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1323ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, r0 1324ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 1325ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1326ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* src */ 1327ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 1328ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 1329ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 1330ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 1331ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 1332ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 1333ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 1334ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 1335ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q12, d0 1336ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q13, d1 1337ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q14, d2 1338ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q15, d3 1339ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1340ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* dst */ 1341ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 1342ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 1343ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 1344ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 1345ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 1346ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 1347ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 1348ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 1349ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q8, d0 1350ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q9, d1 1351ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q10, d2 1352ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q11, d3 1353ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1354ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1355ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q8, q8, q12 1356ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q9, q9, q13 1357ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q10, q10, q14 1358ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmul.i16 q11, q11, q15 1359ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1360ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d0, q8, #8 1361ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d1, q9, #8 1362ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d2, q10, #8 1363ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vshrn.i16 d3, q11, #8 1364ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 1365ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 1366ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 1367ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 1368ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 1369ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 1370ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 1371ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 1372ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1373ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r2, r2, #1 1374ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 1375ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1376ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 1377ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldmfd sp!, {r4, lr} 1378ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 1379ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendMultiply_K) 1380ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1381ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 1382ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams dst = min(src + dst, 1.0) 1383ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1384ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 1385ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = src 1386ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = length 1387ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 1388ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendAdd_K) 1389ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams .save {r4, lr} 1390ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams stmfd sp!, {r4, lr} 1391ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 1392ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1393ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, #255 1394ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vdup.16 q7, r4 1395ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1396ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, r0 1397ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 1398ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1399ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* src */ 1400ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 1401ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 1402ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 1403ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 1404ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 1405ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 1406ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 1407ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 1408ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q12, d0 1409ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q13, d1 1410ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q14, d2 1411ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q15, d3 1412ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1413ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* dst */ 1414ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 1415ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 1416ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 1417ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 1418ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 1419ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 1420ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 1421ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 1422ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q8, d0 1423ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q9, d1 1424ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q10, d2 1425ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q11, d3 1426ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1427ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1428ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vadd.i16 q8, q8, q12 1429ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vadd.i16 q9, q9, q13 1430ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vadd.i16 q10, q10, q14 1431ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vadd.i16 q11, q11, q15 1432ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1433ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d0, q8 1434ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d1, q9 1435ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d2, q10 1436ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d3, q11 1437ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 1438ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 1439ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 1440ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 1441ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 1442ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 1443ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 1444ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 1445ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1446ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r2, r2, #1 1447ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 1448ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1449ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 1450ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldmfd sp!, {r4, lr} 1451ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 1452ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendAdd_K) 1453ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1454ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1455ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* 1456ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams dst = max(dst - src, 0.0) 1457ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1458ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r0 = dst 1459ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r1 = src 1460ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams r2 = length 1461ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/ 1462ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendSub_K) 1463ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams .save {r4, lr} 1464ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams stmfd sp!, {r4, lr} 1465ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpush {q4-q7} 1466ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1467ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, #255 1468ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vdup.16 q7, r4 1469ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1470ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams mov r4, r0 1471ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1: 1472ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1473ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* src */ 1474ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 1475ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 1476ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 1477ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 1478ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 1479ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 1480ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 1481ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 1482ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q12, d0 1483ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q13, d1 1484ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q14, d2 1485ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q15, d3 1486ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1487ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams /* dst */ 1488ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 1489ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 1490ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 1491ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 1492ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 1493ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 1494ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 1495ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 1496ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q8, d0 1497ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q9, d1 1498ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q10, d2 1499ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vmovl.u8 q11, d3 1500ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1501ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1502ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vsub.i16 q8, q8, q12 1503ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vsub.i16 q9, q9, q13 1504ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vsub.i16 q10, q10, q14 1505ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vsub.i16 q11, q11, q15 1506ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1507ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d0, q8 1508ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d1, q9 1509ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d2, q10 1510ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vqmovun.s16 d3, q11 1511ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 1512ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 1513ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 1514ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 1515ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 1516ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 1517ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 1518ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 1519ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1520ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams subs r2, r2, #1 1521ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bne 1b 1522ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1523ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams vpop {q4-q7} 1524ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams ldmfd sp!, {r4, lr} 1525ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams bx lr 1526ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendSub_K) 1527ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams 1528