1e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams/* 2e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * Copyright (C) 2012 The Android Open Source Project 3e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * 4e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * Licensed under the Apache License, Version 2.0 (the "License"); 5e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * you may not use this file except in compliance with the License. 6e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * You may obtain a copy of the License at 7e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * 8e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * http://www.apache.org/licenses/LICENSE-2.0 9e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * 10e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * Unless required by applicable law or agreed to in writing, software 11e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * distributed under the License is distributed on an "AS IS" BASIS, 12e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * See the License for the specific language governing permissions and 14e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * limitations under the License. 15e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams */ 16e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 17e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 18e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 19e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams#include <machine/cpu-features.h> 20e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams#include <machine/asm.h> 21e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 22e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams/* 23e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r0 = dst 24e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r1 = y0 base pointer 25e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r2 = y1 base pointer 26e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r3 = y2 base pointer 27e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams sp = coeffs 28e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams sp = length / 2 29e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams*/ 30e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 31e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason SamsENTRY(rsdIntrinsicConvolve3x3_K) 32eca876089980799774bbe5f8bf341e780bd94348Jason Sams push {r4-r8, r10, r11, lr} 33e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vpush {q4-q7} 34e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 35e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams /* Get the coeffs pointer from the stack and load the 36eca876089980799774bbe5f8bf341e780bd94348Jason Sams coefficients in the q0, q1 NEON registers */ 37eca876089980799774bbe5f8bf341e780bd94348Jason Sams ldr r4, [sp, #32+64] 38eca876089980799774bbe5f8bf341e780bd94348Jason Sams vld1.16 {q0, q1}, [r4] 39eca876089980799774bbe5f8bf341e780bd94348Jason Sams 40eca876089980799774bbe5f8bf341e780bd94348Jason Sams /* Get count from the stack */ 41eca876089980799774bbe5f8bf341e780bd94348Jason Sams ldr r4, [sp, #36+64] 42eca876089980799774bbe5f8bf341e780bd94348Jason Sams 43eca876089980799774bbe5f8bf341e780bd94348Jason Sams /* Load the frequently used immediate in a register */ 44eca876089980799774bbe5f8bf341e780bd94348Jason Sams mov r5, #8 45e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 46e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams1: 47eca876089980799774bbe5f8bf341e780bd94348Jason Sams /* Load and post-increase the address by r5=#8 */ 48eca876089980799774bbe5f8bf341e780bd94348Jason Sams vld1.8 {q13}, [r1], r5 49eca876089980799774bbe5f8bf341e780bd94348Jason Sams vld1.8 {q14}, [r2], r5 50eca876089980799774bbe5f8bf341e780bd94348Jason Sams vld1.8 {q15}, [r3], r5 51eca876089980799774bbe5f8bf341e780bd94348Jason Sams 52eca876089980799774bbe5f8bf341e780bd94348Jason Sams /* Signal memory for data that will be used in the loop after the next */ 53eca876089980799774bbe5f8bf341e780bd94348Jason Sams PLD (r1, r5) 54eca876089980799774bbe5f8bf341e780bd94348Jason Sams PLD (r2, r5) 55eca876089980799774bbe5f8bf341e780bd94348Jason Sams PLD (r3, r5) 56e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 57e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q2, d26 58e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q3, d27 59e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q4, d28 60e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q5, d29 61e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q6, d30 62e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q7, d31 63e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 64e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams/* 65e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams The two pixel source array is 66e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams d4, d5, d6, d7 67e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams d8, d9, d10, d11 68e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams d12, d13, d14, d15 69e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams*/ 70e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 71e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmull.s16 q8, d4, d0[0] 72e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q8, d5, d0[1] 73e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q8, d6, d0[2] 742207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams vmlal.s16 q8, d8, d0[3] 75eca876089980799774bbe5f8bf341e780bd94348Jason Sams vmlal.s16 q8, d9, d1[0] 76eca876089980799774bbe5f8bf341e780bd94348Jason Sams vmlal.s16 q8, d10, d1[1] 77eca876089980799774bbe5f8bf341e780bd94348Jason Sams vmlal.s16 q8, d12, d1[2] 78eca876089980799774bbe5f8bf341e780bd94348Jason Sams vmlal.s16 q8, d13, d1[3] 79eca876089980799774bbe5f8bf341e780bd94348Jason Sams vmlal.s16 q8, d14, d2[0] 80e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 81eca876089980799774bbe5f8bf341e780bd94348Jason Sams vmull.s16 q9, d5, d0[0] 82eca876089980799774bbe5f8bf341e780bd94348Jason Sams vmlal.s16 q9, d6, d0[1] 83eca876089980799774bbe5f8bf341e780bd94348Jason Sams vmlal.s16 q9, d7, d0[2] 84e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d9, d0[3] 85e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d10, d1[0] 86e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d11, d1[1] 87e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d13, d1[2] 88e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d14, d1[3] 89e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d15, d2[0] 90e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 91e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d16, q8, #8 92e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d17, q9, #8 93e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 94e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vqmovun.s16 d16, q8 95e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst1.8 d16, [r0]! 96e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 97eca876089980799774bbe5f8bf341e780bd94348Jason Sams /* Are we done yet? */ 98e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams subs r4, r4, #1 99e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams bne 1b 100e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 101eca876089980799774bbe5f8bf341e780bd94348Jason Sams /* We're done, bye! */ 102e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vpop {q4-q7} 103eca876089980799774bbe5f8bf341e780bd94348Jason Sams pop {r4-r8, r10, r11, lr} 104e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams bx lr 105e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason SamsEND(TestConvolveK) 106e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 107e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams/* 108e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r0 = dst 109e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r1 = src 11040945e01597adaed9e728a14a17bf4a35452abd5Jason Sams r2 = matrix 111e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r3 = length 112e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams*/ 113e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason SamsENTRY(rsdIntrinsicColorMatrix4x4_K) 114e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams .save {r4, lr} 115e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams stmfd sp!, {r4, lr} 116e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vpush {q4-q7} 117e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 118e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld1.16 {q2}, [r2]! 119e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld1.16 {q3}, [r2]! 120e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 121e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams1: 122e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 123e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 124e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 125e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 126e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 12740945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmovl.u8 q12, d0 /* R */ 12840945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmovl.u8 q13, d1 /* G */ 12940945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmovl.u8 q14, d2 /* B */ 13040945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmovl.u8 q15, d3 /* A */ 131e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 132e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmull.s16 q8, d24, d4[0] 13340945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmull.s16 q9, d24, d4[1] 13440945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmull.s16 q10, d24, d4[2] 13540945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmull.s16 q11, d24, d4[3] 136e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 13740945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmlal.s16 q8, d26, d5[0] 138e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d26, d5[1] 13940945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmlal.s16 q10, d26, d5[2] 14040945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmlal.s16 q11, d26, d5[3] 141e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 14240945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmlal.s16 q8, d28, d6[0] 14340945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmlal.s16 q9, d28, d6[1] 144e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q10, d28, d6[2] 14540945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmlal.s16 q11, d28, d6[3] 146e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 14740945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmlal.s16 q8, d30, d7[0] 14840945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmlal.s16 q9, d30, d7[1] 14940945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmlal.s16 q10, d30, d7[2] 150e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q11, d30, d7[3] 151e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 152e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d24, q8, #8 153e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d26, q9, #8 154e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d28, q10, #8 155e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d30, q11, #8 156e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 157e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vqmovun.s16 d0, q12 158e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vqmovun.s16 d1, q13 159e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vqmovun.s16 d2, q14 160e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vqmovun.s16 d3, q15 161e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 162e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 163e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 164e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 165e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 166e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 167e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams subs r3, r3, #1 168e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams bne 1b 169e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 170e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vpop {q4-q7} 171e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams ldmfd sp!, {r4, lr} 172e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams bx lr 17340945e01597adaed9e728a14a17bf4a35452abd5Jason SamsEND(rsdIntrinsicColorMatrix4x4_K) 174e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 175e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams/* 176e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r0 = dst 177e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r1 = src 17840945e01597adaed9e728a14a17bf4a35452abd5Jason Sams r2 = matrix 179e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r3 = length 180e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams*/ 18140945e01597adaed9e728a14a17bf4a35452abd5Jason SamsENTRY(rsdIntrinsicColorMatrix3x3_K) 182e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams .save {r4, lr} 183e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams stmfd sp!, {r4, lr} 184e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vpush {q4-q7} 185e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 186e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld1.16 {q2}, [r2]! 187e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld1.16 {q3}, [r2]! 188e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 189e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams1: 190e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 191e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 192e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 193e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 194e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 195e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q12, d0 196e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q13, d1 197e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q14, d2 198e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 199e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmull.s16 q8, d24, d4[0] 20040945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmull.s16 q9, d24, d4[1] 20140945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmull.s16 q10, d24, d4[2] 202e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 20340945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmlal.s16 q8, d26, d5[0] 204e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d26, d5[1] 20540945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmlal.s16 q10, d26, d5[2] 206e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 20740945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmlal.s16 q8, d28, d6[0] 20840945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmlal.s16 q9, d28, d6[1] 209e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q10, d28, d6[2] 210e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 211e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d24, q8, #8 212e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d26, q9, #8 213e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d28, q10, #8 214e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 215e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vqmovun.s16 d0, q12 216e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vqmovun.s16 d1, q13 217e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vqmovun.s16 d2, q14 218e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 219e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 220e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 221e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 222e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 223e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 224e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams subs r3, r3, #1 225e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams bne 1b 226e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 227e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vpop {q4-q7} 228e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams ldmfd sp!, {r4, lr} 229e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams bx lr 23040945e01597adaed9e728a14a17bf4a35452abd5Jason SamsEND(rsdIntrinsicColorMatrix3x3_K) 23140945e01597adaed9e728a14a17bf4a35452abd5Jason Sams 23240945e01597adaed9e728a14a17bf4a35452abd5Jason Sams/* 23340945e01597adaed9e728a14a17bf4a35452abd5Jason Sams r0 = dst 23440945e01597adaed9e728a14a17bf4a35452abd5Jason Sams r1 = src 23540945e01597adaed9e728a14a17bf4a35452abd5Jason Sams r2 = matrix 23640945e01597adaed9e728a14a17bf4a35452abd5Jason Sams r3 = length 23740945e01597adaed9e728a14a17bf4a35452abd5Jason Sams*/ 23840945e01597adaed9e728a14a17bf4a35452abd5Jason SamsENTRY(rsdIntrinsicColorMatrixDot_K) 23940945e01597adaed9e728a14a17bf4a35452abd5Jason Sams .save {r4, lr} 24040945e01597adaed9e728a14a17bf4a35452abd5Jason Sams stmfd sp!, {r4, lr} 24140945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vpush {q4-q7} 24240945e01597adaed9e728a14a17bf4a35452abd5Jason Sams 24340945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vld1.16 {q2}, [r2]! 24440945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vld1.16 {q3}, [r2]! 24540945e01597adaed9e728a14a17bf4a35452abd5Jason Sams 24640945e01597adaed9e728a14a17bf4a35452abd5Jason Sams1: 24740945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 24840945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 24940945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 25040945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 25140945e01597adaed9e728a14a17bf4a35452abd5Jason Sams 25240945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmovl.u8 q12, d0 25340945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmovl.u8 q13, d1 25440945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmovl.u8 q14, d2 25540945e01597adaed9e728a14a17bf4a35452abd5Jason Sams 25640945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmull.s16 q8, d24, d4[0] 25740945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmlal.s16 q8, d26, d5[0] 25840945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmlal.s16 q8, d28, d6[0] 25940945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vshrn.i32 d24, q8, #8 26040945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vqmovun.s16 d0, q12 26140945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmov.u8 d1, d0 26240945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vmov.u8 d2, d0 26340945e01597adaed9e728a14a17bf4a35452abd5Jason Sams 26440945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 26540945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 26640945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 26740945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 26840945e01597adaed9e728a14a17bf4a35452abd5Jason Sams 26940945e01597adaed9e728a14a17bf4a35452abd5Jason Sams subs r3, r3, #1 27040945e01597adaed9e728a14a17bf4a35452abd5Jason Sams bne 1b 27140945e01597adaed9e728a14a17bf4a35452abd5Jason Sams 27240945e01597adaed9e728a14a17bf4a35452abd5Jason Sams vpop {q4-q7} 27340945e01597adaed9e728a14a17bf4a35452abd5Jason Sams ldmfd sp!, {r4, lr} 27440945e01597adaed9e728a14a17bf4a35452abd5Jason Sams bx lr 27540945e01597adaed9e728a14a17bf4a35452abd5Jason SamsEND(rsdIntrinsicColorMatrixDot_K) 276e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 277e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 278e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams/* 279e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Samsstatic void OneVF(float4 *out, const uchar *ptrIn, int iStride, 280e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams const float* gPtr, int iradius, int x1, int x2) 281e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 282e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams r0 = out 283e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams r1 = pin 284e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams r2 = stride 285e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams r3 = gptr 286e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams r4 = sp, ct 287e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams r5 = sp+4, x1 288e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams r6 = sp+8, x2 289e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams*/ 290e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason SamsENTRY(rsdIntrinsicBlurVF_K) 291e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams push {r4-r8, r10, r11, lr} 292e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams vpush {q4-q7} 293e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 294e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams ldr r4, [sp, #32+64] 295e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams ldr r5, [sp, #32+64 + 4] 296e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams ldr r6, [sp, #32+64 + 8] 297e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 298e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams1: 2992207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams veor q10, q10, q10 /* float4 blurredPixel = 0; */ 3002207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams veor q11, q11, q11 /* float4 blurredPixel = 0; */ 301e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams add r7, r1, r5, lsl #2 /* const uchar *pi = ptrIn + x1 * 4; */ 302e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams mov r10, r3 303e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 304e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams mov r11, r4 305e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 306e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams2: 3072207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams vld1.32 {d2}, [r7] 308e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams vmovl.u8 q1, d2 3092207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams vmovl.u16 q3, d2 3102207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams vmovl.u16 q4, d3 3112207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams vcvt.f32.s32 q3, q3 3122207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams vcvt.f32.s32 q4, q4 3132207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams vld1.32 {d0[0]}, [r10]! 314e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams add r7, r7, r2 3152207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams vmla.f32 q10, q3, d0[0] 3162207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams vmla.f32 q11, q4, d0[0] 317e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams subs r11, r11, #1 318e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams bne 2b 319e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 3202207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams vst1.32 {q10}, [r0]! 3212207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams vst1.32 {q11}, [r0]! 3222207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams add r5, r5, #2 323e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams cmp r5, r6 324e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams bne 1b 325e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 326e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 327e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams vpop {q4-q7} 328e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams pop {r4-r8, r10, r11, lr} 329e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams bx lr 330e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason SamsEND(rsdIntrinsicBlurVF_K) 331e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 332e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams/* 333e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Samsstatic void OneVF(float4 *out, const uchar *ptrIn, int iStride, 334e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams const float* gPtr, int iradius, int x1, int x2) 335e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 336e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams r0 = out 337e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams r1 = pin 338e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams r2 = gptr 339e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams r3 = ct 340e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams r4 = sp, x1 341e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams r5 = sp+4, x2 342e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams*/ 343e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason SamsENTRY(rsdIntrinsicBlurHF_K) 344e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams push {r4-r8, r10, r11, lr} 345e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams vpush {q4-q7} 346e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 347e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams ldr r4, [sp, #32+64] 348e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams ldr r5, [sp, #32+64 + 4] 349e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 350e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams1: 351e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams add r7, r1, r4, lsl #4 /* const uchar *pi = ptrIn + x1 * 4; */ 352e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams mov r10, r2 353e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams mov r11, r3 354e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 3552207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams vld1.32 {q1}, [r7]! 3562207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams vld1.32 {d6[0]}, [r10]! 3572207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams vmul.f32 q0, q1, d6[0] 3582207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams sub r11, r11, #1 3592207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams 360e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams2: 361e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams vld1.32 {q1}, [r7]! 3622207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams vld1.32 {q2}, [r7]! 363e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams vld1.32 {d6[0]}, [r10]! 3642207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams vld1.32 {d6[1]}, [r10]! 365e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams vmla.f32 q0, q1, d6[0] 3662207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams vmla.f32 q0, q2, d6[1] 3672207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams subs r11, r11, #2 368e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams bne 2b 369e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 370e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams vcvt.s32.f32 q0, q0 371e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams vmovn.u32 d0, q0 372e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams vmovn.u16 d0, q0 373e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 374e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams vst1.32 {d0[0]}, [r0]! 375e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams add r4, r4, #1 376e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams cmp r4, r5 377e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams bne 1b 378e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 379e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams vpop {q4-q7} 380e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams pop {r4-r8, r10, r11, lr} 381e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams bx lr 382e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason SamsEND(rsdIntrinsicBlurHF_K) 383e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 384915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams/* 385915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams r0 = dst 386915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams r1 = Y 387915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams r2 = VU 388915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams r3 = length (pixels / 8) 389915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams r4 = sp, params 390915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams 391915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams This function converts 8 pixels per iteration 392915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams*/ 393915aa964a1a312f5e06c115112a3aea14fd31b33Jason SamsENTRY(rsdIntrinsicYuv_K) 394915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams push {r4-r8, r10, r11, lr} 395915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vpush {q4-q7} 396915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams 397915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams ldr r4, [sp, #32+64] 398915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vld1.16 {q2}, [r4]! // mults 399915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vld1.16 {q3}, [r4]! // y offset 400915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vld1.16 {q4}, [r4]! // 128 401915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vdup.8 d3, d5[1] 402915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams 403915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams1: 404915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vld1.8 {d10}, [r1]! 405915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vld1.8 {d12}, [r2]! 406915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vmovl.u8 q5, d10 // Y at .16 407915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vmovl.u8 q6, d12 // vu at .16 408915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams 409915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vsub.i16 q5, q5, q3 410915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vsub.i16 q6, q6, q4 411915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vtrn.16 d12, d13 // d12 = u, d13 = v 412915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vmov q7, q6 413915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vtrn.16 d12, d14 414915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vtrn.32 d12, d14 415915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vtrn.16 d13, d15 416915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vtrn.32 d13, d15 417915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams 418915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vmull.s16 q8, d10, d4[0] 419915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vmull.s16 q11, d11, d4[0] 420915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vmov q9, q8 421915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vmov q10, q8 422915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vmov q12, q11 423915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vmov q13, q11 424915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams 425915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vmlal.s16 q8, d12, d4[1] 426915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vmlal.s16 q9, d12, d5[0] 427915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vmlal.s16 q10, d13, d4[3] 428915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vmlal.s16 q9, d13, d4[2] 429915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams 430915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vmlal.s16 q11, d14, d4[1] 431915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vmlal.s16 q12, d14, d5[0] 432915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vmlal.s16 q13, d15, d4[3] 433915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vmlal.s16 q12, d15, d4[2] 434915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams 435915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams 436915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vshrn.i32 d16, q8, #8 437915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vshrn.i32 d18, q9, #8 438915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vshrn.i32 d20, q10, #8 439915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vqmovun.s16 d0, q8 440915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vqmovun.s16 d1, q9 441915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vqmovun.s16 d2, q10 442915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 443915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 444915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 445915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 446915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams 447915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vshrn.i32 d16, q11, #8 448915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vshrn.i32 d18, q12, #8 449915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vshrn.i32 d20, q13, #8 450915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vqmovun.s16 d0, q8 451915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vqmovun.s16 d1, q9 452915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vqmovun.s16 d2, q10 453915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 454915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 455915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 456915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 457915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams 458915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams subs r3, r3, #1 459915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams bne 1b 460915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams 461915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams vpop {q4-q7} 462915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams pop {r4-r8, r10, r11, lr} 463915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams bx lr 464915aa964a1a312f5e06c115112a3aea14fd31b33Jason SamsEND(rsdIntrinsicYuv_K) 465915aa964a1a312f5e06c115112a3aea14fd31b33Jason Sams 466a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams/* Convolve 5x5 */ 467a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 468a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams/* 469a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams r0 = dst 470a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams r1 = y0 base pointer 471a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams r2 = y1 base pointer 472a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams r3 = y2 base pointer 473a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams r4 = y3 base pointer 474a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams r5 = y4 base pointer 475a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams r6 = coeffs 476a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams r7 = length 477a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams*/ 478a1b08e2cacf3891fcd6895422c6124887b75975eJason SamsENTRY(rsdIntrinsicConvolve5x5_K) 479a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams push {r4-r7, lr} 480a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vpush {q4-q7} 481a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 482a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* load y3 in r4 */ 483a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams ldr r4, [sp, #20 + 64] 484a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 485a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* load y4 in r5 */ 486a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams ldr r5, [sp, #24 + 64] 487a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 488a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* Load the coefficients pointer */ 489a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams ldr r6, [sp, #28 + 64] 490a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 491a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* Create the coefficients vector */ 492a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vld1.16 {d0, d1, d2, d3}, [r6]! 493a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vld1.16 {d4, d5, d6}, [r6] 494a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 495a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* load the count */ 496a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams ldr r6, [sp, #32 + 64] 497a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 498a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* Load the frequently used immediate in a register */ 499a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams mov r7, #8 500a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 501a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams1: 502a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* Load the y base pointers in Qregs and post-increase the address by r7=#8 */ 503a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vld1.8 {d24, d25, d26}, [r1], r7 @ y0 ( y - 2 ) 504a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vld1.8 {d27, d28, d29}, [r2], r7 @ y0 ( y - 1 ) 505a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 506a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* Signal memory for data that will be used in the loop after the next */ 507a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams PLD (r1, r7) 508a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams PLD (r2, r7) 509a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 510a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* Promoting the 8bit channels to 16bit */ 511a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmovl.u8 q9, d24 512a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmovl.u8 q10, d25 513a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmovl.u8 q11, d26 514a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmovl.u8 q12, d27 515a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmovl.u8 q13, d28 516a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmovl.u8 q14, d29 517a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 518a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams/* 519a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams d18, d19, d20, d21, d22, d23, 520a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams d24, d25 521a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams*/ 522a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmull.s16 q4, d18, d0[0] 523a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d19, d0[1] 524a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d20, d0[2] 525a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d21, d0[3] 526a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d22, d1[0] 527a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 528a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d24, d1[1] 529a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d25, d1[2] 530a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d26, d1[3] 531a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d27, d2[0] 532a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d28, d2[1] 533a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 534a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmull.s16 q5, d19, d0[0] 535a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d20, d0[1] 536a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d21, d0[2] 537a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d22, d0[3] 538a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d23, d1[0] 539a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 540a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d25, d1[1] 541a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d26, d1[2] 542a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d27, d1[3] 543a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d28, d2[0] 544a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d29, d2[1] 545a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 546a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 547a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* Next 2 rows */ 548a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* Load the y base pointers in Qregs and post-increase the address by r7=#8 */ 549a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vld1.8 {d24, d25, d26}, [r3], r7 @ y0 ( y ) 550a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vld1.8 {d27, d28, d29}, [r4], r7 @ y0 ( y + 1 ) 551a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 552a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* Signal memory for data that will be used in the loop after the next */ 553a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams PLD (r3, r7) 554a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams PLD (r4, r7) 555a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 556a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* Promoting the 8bit channels to 16bit */ 557a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmovl.u8 q9, d24 558a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmovl.u8 q10, d25 559a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmovl.u8 q11, d26 560a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmovl.u8 q12, d27 561a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmovl.u8 q13, d28 562a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmovl.u8 q14, d29 563a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 564a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams/* 565a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams d18, d19, d20, d21, d22, d23, 566a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams d24, d25 567a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams*/ 568a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d18, d2[2] 569a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d19, d2[3] 570a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d20, d3[0] 571a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d21, d3[1] 572a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d22, d3[2] 573a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 574a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d24, d3[3] 575a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d25, d4[0] 576a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d26, d4[1] 577a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d27, d4[2] 578a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d28, d4[3] 579a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 580a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d19, d2[2] 581a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d20, d2[3] 582a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d21, d3[0] 583a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d22, d3[1] 584a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d23, d3[2] 585a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 586a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d25, d3[3] 587a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d26, d4[0] 588a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d27, d4[1] 589a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d28, d4[2] 590a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d29, d4[3] 591a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 592a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* Last row */ 593a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* Load the y base pointers in Qregs and post-increase the address by r7=#8 */ 594a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vld1.8 {d24, d25, d26}, [r5], r7 @ y0 ( y + 2 ) 595a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 596a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* Signal memory for data that will be used in the loop after the next */ 597a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams PLD (r5, r7) 598a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 599a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* Promoting the 8bit channels to 16bit */ 600a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmovl.u8 q9, d24 601a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmovl.u8 q10, d25 602a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmovl.u8 q11, d26 603a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 604a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams/* 605a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams d18, d19, d20, d21, d22, d23, 606a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams d24, d25 607a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams*/ 608a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 609a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d18, d5[0] 610a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d19, d5[1] 611a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d20, d5[2] 612a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d21, d5[3] 613a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q4, d22, d6[0] 614a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 615a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d19, d5[0] 616a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d20, d5[1] 617a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d21, d5[2] 618a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d22, d5[3] 619a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vmlal.s16 q5, d23, d6[0] 620a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 621a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 622a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 623a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 624a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams/* Narrow it to a d-reg 32 -> 16 bit */ 625a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vshrn.i32 d8, q4, #8 626a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vshrn.i32 d9, q5, #8 627a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 628a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams/* Pack 16 -> 8 bit, saturate, put two pixels into D reg */ 629a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vqmovun.s16 d8, q4 630a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 631a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vst1.8 d8, [r0]! @ return the output and increase the address of r0 632a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 633a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* Are we done? */ 634a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams subs r6, r6, #1 635a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams bne 1b 636a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 637a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams /* Yup, bye */ 638a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams vpop {q4-q7} 639a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams pop {r4-r7, lr} 640a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams bx lr 641a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 642a1b08e2cacf3891fcd6895422c6124887b75975eJason SamsEND(rsdIntrinsicConvolve5x5_K) 643fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 644fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 645fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 646fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 647fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams/* 648fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams dst = src + dst * (1.0 - src.a) 649fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 650fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r0 = dst 651fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r1 = src 652fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r2 = length 653fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams*/ 654fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsENTRY(rsdIntrinsicBlendSrcOver_K) 655fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams .save {r4, lr} 656fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams stmfd sp!, {r4, lr} 657fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpush {q4-q7} 658fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 659fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, #255 660fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vdup.16 q7, r4 661fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 662fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, r0 663fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams1: 664fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 665fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* src */ 666fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 667fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 668fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 669fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 670fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 671fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 672fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 673fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 674fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshll.u8 q12, d0, #8 675fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshll.u8 q13, d1, #8 676fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshll.u8 q14, d2, #8 677fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q6, d3 678fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vsub.i16 q6, q7, q6 // q6 = 1 - src.a 679fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshll.u8 q15, d3, #8 680fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 681fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* dst */ 682fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 683fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 684fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 685fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 686fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 687fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 688fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 689fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 690fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q8, d0 691fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q9, d1 692fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q10, d2 693fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q11, d3 694fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 695fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmla.i16 q12, q8, q6 696fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmla.i16 q13, q9, q6 697fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmla.i16 q14, q10, q6 698fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmla.i16 q15, q11, q6 699fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 700fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d0, q12, #8 701fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d1, q13, #8 702fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d2, q14, #8 703fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d3, q15, #8 704fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 705fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 706fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 707fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 708fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 709fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 710fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 711fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 712fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 713fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams subs r2, r2, #1 714fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bne 1b 715fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 716fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpop {q4-q7} 717fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams ldmfd sp!, {r4, lr} 718fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bx lr 719fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsEND(rsdIntrinsicBlendSrcOver_K) 720fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 721fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams/* 722fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams dst = dst + src * (1.0 - dst.a) 723fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 724fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r0 = dst 725fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r1 = src 726fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r2 = length 727fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams*/ 728fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsENTRY(rsdIntrinsicBlendDstOver_K) 729fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams .save {r4, lr} 730fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams stmfd sp!, {r4, lr} 731fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpush {q4-q7} 732fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 733fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, #255 734fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vdup.16 q7, r4 735fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 736fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, r0 737fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams1: 738fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 739fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* src */ 740fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 741fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 742fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 743fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 744fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 745fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 746fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 747fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 748fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q12, d0 749fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q13, d1 750fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q14, d2 751fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q15, d3 752fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 753fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* dst */ 754fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 755fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 756fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 757fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 758fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 759fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 760fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 761fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 762fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshll.u8 q8, d0, #8 763fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshll.u8 q9, d1, #8 764fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshll.u8 q10, d2, #8 765fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q6, d3 766fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vsub.i16 q6, q7, q6 // q6 = 1 - dst.a 767fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshll.u8 q11, d3, #8 768fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 769fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 770fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmla.i16 q8, q12, q6 771fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmla.i16 q9, q13, q6 772fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmla.i16 q10, q14, q6 773fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmla.i16 q11, q15, q6 774fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 775fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d0, q8, #8 776fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d1, q9, #8 777fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d2, q10, #8 778fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d3, q11, #8 779fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 780fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 781fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 782fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 783fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 784fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 785fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 786fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 787fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 788fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams subs r2, r2, #1 789fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bne 1b 790fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 791fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpop {q4-q7} 792fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams ldmfd sp!, {r4, lr} 793fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bx lr 794fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsEND(rsdIntrinsicBlendDstOver_K) 795fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 796fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams/* 797fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams dst = src * dst.a 798fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 799fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r0 = dst 800fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r1 = src 801fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r2 = length 802fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams*/ 803fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsENTRY(rsdIntrinsicBlendSrcIn_K) 804fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams .save {r4, lr} 805fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams stmfd sp!, {r4, lr} 806fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpush {q4-q7} 807fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 808fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, r0 809fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams1: 810fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 811fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* src */ 812fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 813fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 814fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 815fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 816fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 817fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 818fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 819fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 820fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q12, d0 821fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q13, d1 822fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q14, d2 823fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q15, d3 824fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 825fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* dst */ 826fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 827fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 828fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 829fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 830fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 831fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 832fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 833fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 834fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams //vmovl.u8 q8, d0 835fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams //vmovl.u8 q9, d1 836fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams //vmovl.u8 q10, d2 837fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q11, d3 838fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 839fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q12, q12, q11 840fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q13, q13, q11 841fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q14, q14, q11 842fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q15, q15, q11 843fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 844fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d0, q12, #8 845fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d1, q13, #8 846fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d2, q14, #8 847fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d3, q15, #8 848fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 849fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 850fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 851fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 852fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 853fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 854fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 855fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 856fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 857fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams subs r2, r2, #1 858fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bne 1b 859fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 860fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpop {q4-q7} 861fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams ldmfd sp!, {r4, lr} 862fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bx lr 863fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsEND(rsdIntrinsicBlendSrcIn_K) 864fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 865fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams/* 866fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams dst = dst * src.a 867fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 868fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r0 = dst 869fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r1 = src 870fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r2 = length 871fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams*/ 872fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsENTRY(rsdIntrinsicBlendDstIn_K) 873fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams .save {r4, lr} 874fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams stmfd sp!, {r4, lr} 875fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpush {q4-q7} 876fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 877fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, r0 878fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams1: 879fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 880fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* src */ 881fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 882fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 883fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 884fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 885fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 886fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 887fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 888fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 889fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams //vmovl.u8 q12, d0 890fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams //vmovl.u8 q13, d1 891fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams //vmovl.u8 q14, d2 892fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q15, d3 893fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 894fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* dst */ 895fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 896fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 897fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 898fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 899fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 900fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 901fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 902fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 903fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q8, d0 904fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q9, d1 905fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q10, d2 906fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q11, d3 907fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 908fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q8, q8, q15 909fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q9, q9, q15 910fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q10, q10, q15 911fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q11, q11, q15 912fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 913fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d0, q8, #8 914fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d1, q9, #8 915fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d2, q10, #8 916fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d3, q11, #8 917fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 918fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 919fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 920fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 921fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 922fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 923fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 924fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 925fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 926fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams subs r2, r2, #1 927fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bne 1b 928fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 929fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpop {q4-q7} 930fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams ldmfd sp!, {r4, lr} 931fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bx lr 932fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsEND(rsdIntrinsicBlendDstIn_K) 933fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 934fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 935fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 936fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams/* 937fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams dst = src * (1.0 - dst.a) 938fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 939fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r0 = dst 940fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r1 = src 941fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r2 = length 942fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams*/ 943fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsENTRY(rsdIntrinsicBlendSrcOut_K) 944fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams .save {r4, lr} 945fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams stmfd sp!, {r4, lr} 946fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpush {q4-q7} 947fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 948fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, #255 949fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vdup.16 q7, r4 950fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 951fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, r0 952fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams1: 953fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 954fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* src */ 955fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 956fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 957fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 958fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 959fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 960fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 961fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 962fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 963fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q12, d0 964fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q13, d1 965fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q14, d2 966fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q15, d3 967fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 968fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* dst */ 969fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 970fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 971fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 972fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 973fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 974fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 975fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 976fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 977fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams //vmovl.u8 q8, d0 978fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams //vmovl.u8 q9, d1 979fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams //vmovl.u8 q10, d2 980fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q11, d3 981fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 982fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 983fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vsub.i16 q6, q7, q11 // q6 = 1 - dst.a 984fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q12, q12, q6 985fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q13, q13, q6 986fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q14, q14, q6 987fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q15, q15, q6 988fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 989fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d0, q12, #8 990fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d1, q13, #8 991fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d2, q14, #8 992fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d3, q15, #8 993fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 994fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 995fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 996fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 997fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 998fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 999fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 1000fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 1001fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1002fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams subs r2, r2, #1 1003fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bne 1b 1004fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1005fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpop {q4-q7} 1006fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams ldmfd sp!, {r4, lr} 1007fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bx lr 1008fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsEND(rsdIntrinsicBlendSrcOut_K) 1009fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1010fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1011fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams/* 1012fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams dst = dst * (1.0 - src.a) 1013fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1014fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r0 = dst 1015fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r1 = src 1016fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r2 = length 1017fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams*/ 1018fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsENTRY(rsdIntrinsicBlendDstOut_K) 1019fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams .save {r4, lr} 1020fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams stmfd sp!, {r4, lr} 1021fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpush {q4-q7} 1022fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1023fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, #255 1024fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vdup.16 q7, r4 1025fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1026fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, r0 1027fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams1: 1028fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1029fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* src */ 1030fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 1031fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 1032fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 1033fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 1034fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 1035fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 1036fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 1037fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 1038fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams //vmovl.u8 q12, d0 1039fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams //vmovl.u8 q13, d1 1040fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams //vmovl.u8 q14, d2 1041fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q15, d3 1042fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1043fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* dst */ 1044fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 1045fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 1046fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 1047fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 1048fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 1049fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 1050fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 1051fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 1052fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q8, d0 1053fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q9, d1 1054fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q10, d2 1055fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q11, d3 1056fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1057fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1058fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vsub.i16 q6, q7, q15 // q6 = 1 - src.a 1059fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q12, q8, q6 1060fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q13, q9, q6 1061fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q14, q10, q6 1062fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q15, q11, q6 1063fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1064fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d0, q12, #8 1065fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d1, q13, #8 1066fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d2, q14, #8 1067fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d3, q15, #8 1068fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 1069fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 1070fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 1071fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 1072fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 1073fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 1074fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 1075fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 1076fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1077fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams subs r2, r2, #1 1078fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bne 1b 1079fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1080fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpop {q4-q7} 1081fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams ldmfd sp!, {r4, lr} 1082fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bx lr 1083fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsEND(rsdIntrinsicBlendDstOut_K) 1084fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1085fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1086fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams/* 1087fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams dst.rgb = src.rgb * dst.a + (1.0 - src.a) * dst.rgb 1088fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams dst.a = dst.a 1089fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1090fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r0 = dst 1091fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r1 = src 1092fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r2 = length 1093fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams*/ 1094fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsENTRY(rsdIntrinsicBlendSrcAtop_K) 1095fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams .save {r4, lr} 1096fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams stmfd sp!, {r4, lr} 1097fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpush {q4-q7} 1098fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1099fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, #255 1100fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vdup.16 q7, r4 1101fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1102fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, r0 1103fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams1: 1104fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1105fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* src */ 1106fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 1107fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 1108fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 1109fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 1110fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 1111fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 1112fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 1113fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 1114fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q12, d0 1115fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q13, d1 1116fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q14, d2 1117fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q15, d3 1118fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1119fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* dst */ 1120fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 1121fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 1122fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 1123fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 1124fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 1125fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 1126fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 1127fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 1128fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q8, d0 1129fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q9, d1 1130fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q10, d2 1131fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q11, d3 1132fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1133fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1134fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vsub.i16 q6, q7, q15 // q6 = 1 - src.a 1135fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q8, q8, q6 1136fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q9, q9, q6 1137fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q10, q10, q6 1138fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1139fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmla.i16 q8, q12, q11 1140fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmla.i16 q9, q13, q11 1141fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmla.i16 q10, q14, q11 1142fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1143fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1144fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d0, q8, #8 1145fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d1, q9, #8 1146fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d2, q10, #8 1147fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams //vshrn.i16 d3, q15, #8 1148fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 1149fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 1150fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 1151fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 1152fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 1153fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 1154fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 1155fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 1156fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1157fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams subs r2, r2, #1 1158fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bne 1b 1159fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1160fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpop {q4-q7} 1161fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams ldmfd sp!, {r4, lr} 1162fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bx lr 1163fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsEND(rsdIntrinsicBlendSrcAtop_K) 1164fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1165fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams/* 1166fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams dst = dst.rgb * src.a + (1.0 - dst.a) * src.rgb 1167fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams dst.a = src.a 1168fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1169fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r0 = dst 1170fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r1 = src 1171fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r2 = length 1172fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams*/ 1173fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsENTRY(rsdIntrinsicBlendDstAtop_K) 1174fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams .save {r4, lr} 1175fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams stmfd sp!, {r4, lr} 1176fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpush {q4-q7} 1177fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1178fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, #255 1179fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vdup.16 q7, r4 1180fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1181fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, r0 1182fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams1: 1183fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1184fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* src */ 1185fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 1186fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 1187fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 1188fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 1189fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 1190fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 1191fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 1192fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 1193fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q12, d0 1194fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q13, d1 1195fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q14, d2 1196fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q15, d3 1197fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1198fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* dst */ 1199fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 1200fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 1201fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 1202fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 1203fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 1204fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 1205fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 1206fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 1207fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q8, d0 1208fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q9, d1 1209fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q10, d2 1210fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q11, d3 1211fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1212fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1213fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vsub.i16 q6, q7, q11 // q6 = 1 - dst.a 1214fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q12, q12, q6 1215fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q13, q13, q6 1216fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q14, q14, q6 1217fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1218fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmla.i16 q12, q8, q15 1219fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmla.i16 q13, q9, q15 1220fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmla.i16 q14, q10, q15 1221fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1222fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1223fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d0, q12, #8 1224fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d1, q13, #8 1225fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d2, q14, #8 1226fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams //vshrn.i16 d3, q15, #8 1227fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 1228fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 1229fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 1230fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 1231fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 1232fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 1233fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 1234fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 1235fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1236fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams subs r2, r2, #1 1237fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bne 1b 1238fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1239fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpop {q4-q7} 1240fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams ldmfd sp!, {r4, lr} 1241fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bx lr 1242fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsEND(rsdIntrinsicBlendDstAtop_K) 1243fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1244fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams/* 1245fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams dst = dst ^ src 1246fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1247fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r0 = dst 1248fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r1 = src 1249fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r2 = length 1250fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams*/ 1251fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsENTRY(rsdIntrinsicBlendXor_K) 1252fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams .save {r4, lr} 1253fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams stmfd sp!, {r4, lr} 1254fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpush {q4-q7} 1255fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1256fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, #255 1257fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vdup.16 q7, r4 1258fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1259fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, r0 1260fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams1: 1261fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1262fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* src */ 1263fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 1264fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 1265fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 1266fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 1267fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 1268fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 1269fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 1270fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 1271fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmov.u8 d4, d0 1272fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmov.u8 d5, d1 1273fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmov.u8 d6, d2 1274fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmov.u8 d7, d3 1275fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1276fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* dst */ 1277fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 1278fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 1279fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 1280fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 1281fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 1282fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 1283fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 1284fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 1285fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1286fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams veor d0, d0, d4 1287fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams veor d1, d1, d5 1288fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams veor d2, d2, d6 1289fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams veor d3, d3, d7 1290fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1291fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 1292fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 1293fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 1294fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 1295fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 1296fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 1297fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 1298fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 1299fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1300fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams subs r2, r2, #1 1301fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bne 1b 1302fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1303fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpop {q4-q7} 1304fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams ldmfd sp!, {r4, lr} 1305fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bx lr 1306fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsEND(rsdIntrinsicBlendXor_K) 1307fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1308fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams/* 1309fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams dst = dst * src 1310fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1311fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r0 = dst 1312fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r1 = src 1313fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r2 = length 1314fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams*/ 1315fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsENTRY(rsdIntrinsicBlendMultiply_K) 1316fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams .save {r4, lr} 1317fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams stmfd sp!, {r4, lr} 1318fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpush {q4-q7} 1319fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1320fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, #255 1321fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vdup.16 q7, r4 1322fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1323fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, r0 1324fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams1: 1325fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1326fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* src */ 1327fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 1328fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 1329fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 1330fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 1331fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 1332fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 1333fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 1334fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 1335fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q12, d0 1336fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q13, d1 1337fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q14, d2 1338fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q15, d3 1339fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1340fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* dst */ 1341fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 1342fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 1343fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 1344fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 1345fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 1346fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 1347fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 1348fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 1349fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q8, d0 1350fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q9, d1 1351fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q10, d2 1352fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q11, d3 1353fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1354fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1355fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q8, q8, q12 1356fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q9, q9, q13 1357fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q10, q10, q14 1358fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmul.i16 q11, q11, q15 1359fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1360fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d0, q8, #8 1361fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d1, q9, #8 1362fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d2, q10, #8 1363fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vshrn.i16 d3, q11, #8 1364fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 1365fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 1366fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 1367fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 1368fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 1369fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 1370fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 1371fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 1372fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1373fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams subs r2, r2, #1 1374fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bne 1b 1375fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1376fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpop {q4-q7} 1377fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams ldmfd sp!, {r4, lr} 1378fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bx lr 1379fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsEND(rsdIntrinsicBlendMultiply_K) 1380fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1381fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams/* 1382fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams dst = min(src + dst, 1.0) 1383fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1384fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r0 = dst 1385fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r1 = src 1386fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r2 = length 1387fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams*/ 1388fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsENTRY(rsdIntrinsicBlendAdd_K) 1389fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams .save {r4, lr} 1390fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams stmfd sp!, {r4, lr} 1391fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpush {q4-q7} 1392fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1393fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, #255 1394fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vdup.16 q7, r4 1395fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1396fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, r0 1397fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams1: 1398fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1399fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* src */ 1400fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 1401fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 1402fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 1403fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 1404fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 1405fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 1406fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 1407fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 1408fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q12, d0 1409fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q13, d1 1410fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q14, d2 1411fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q15, d3 1412fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1413fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* dst */ 1414fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 1415fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 1416fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 1417fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 1418fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 1419fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 1420fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 1421fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 1422fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q8, d0 1423fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q9, d1 1424fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q10, d2 1425fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q11, d3 1426fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1427fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1428fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vadd.i16 q8, q8, q12 1429fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vadd.i16 q9, q9, q13 1430fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vadd.i16 q10, q10, q14 1431fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vadd.i16 q11, q11, q15 1432fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1433fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vqmovun.s16 d0, q8 1434fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vqmovun.s16 d1, q9 1435fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vqmovun.s16 d2, q10 1436fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vqmovun.s16 d3, q11 1437fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 1438fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 1439fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 1440fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 1441fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 1442fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 1443fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 1444fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 1445fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1446fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams subs r2, r2, #1 1447fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bne 1b 1448fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1449fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpop {q4-q7} 1450fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams ldmfd sp!, {r4, lr} 1451fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bx lr 1452fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsEND(rsdIntrinsicBlendAdd_K) 1453fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1454fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1455fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams/* 1456fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams dst = max(dst - src, 0.0) 1457fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1458fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r0 = dst 1459fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r1 = src 1460fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams r2 = length 1461fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams*/ 1462fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsENTRY(rsdIntrinsicBlendSub_K) 1463fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams .save {r4, lr} 1464fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams stmfd sp!, {r4, lr} 1465fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpush {q4-q7} 1466fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1467fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, #255 1468fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vdup.16 q7, r4 1469fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1470fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams mov r4, r0 1471fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams1: 1472fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1473fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* src */ 1474fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 1475fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 1476fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 1477fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 1478fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]! 1479fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]! 1480fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]! 1481fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]! 1482fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q12, d0 1483fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q13, d1 1484fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q14, d2 1485fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q15, d3 1486fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1487fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams /* dst */ 1488fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 1489fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 1490fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 1491fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 1492fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]! 1493fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]! 1494fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]! 1495fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]! 1496fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q8, d0 1497fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q9, d1 1498fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q10, d2 1499fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vmovl.u8 q11, d3 1500fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1501fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1502fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vsub.i16 q8, q8, q12 1503fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vsub.i16 q9, q9, q13 1504fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vsub.i16 q10, q10, q14 1505fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vsub.i16 q11, q11, q15 1506fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1507fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vqmovun.s16 d0, q8 1508fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vqmovun.s16 d1, q9 1509fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vqmovun.s16 d2, q10 1510fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vqmovun.s16 d3, q11 1511fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]! 1512fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]! 1513fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]! 1514fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]! 1515fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]! 1516fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]! 1517fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]! 1518fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]! 1519fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1520fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams subs r2, r2, #1 1521fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bne 1b 1522fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1523fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams vpop {q4-q7} 1524fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams ldmfd sp!, {r4, lr} 1525fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams bx lr 1526fa17cda2d7e0948677035890e40498ad0b639c92Jason SamsEND(rsdIntrinsicBlendSub_K) 1527fa17cda2d7e0948677035890e40498ad0b639c92Jason Sams 1528