1///***************************************************************************** 2//* 3//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4//* 5//* Licensed under the Apache License, Version 2.0 (the "License"); 6//* you may not use this file except in compliance with the License. 7//* You may obtain a copy of the License at: 8//* 9//* http://www.apache.org/licenses/LICENSE-2.0 10//* 11//* Unless required by applicable law or agreed to in writing, software 12//* distributed under the License is distributed on an "AS IS" BASIS, 13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14//* See the License for the specific language governing permissions and 15//* limitations under the License. 16//* 17//*****************************************************************************/ 18///** 19//******************************************************************************* 20//* @file 21//* ihevc_intra_pred_luma_mode_18_34_neon.s 22//* 23//* @brief 24//* contains function definitions for intra prediction dc filtering. 25//* functions are coded using neon intrinsics and can be compiled using 26 27//* rvct 28//* 29//* @author 30//* yogeswaran rs 31//* 32//* @par list of functions: 33//* 34//* 35//* @remarks 36//* none 37//* 38//******************************************************************************* 39//*/ 40///** 41//******************************************************************************* 42//* 43//* @brief 44//* luma intraprediction filter for dc input 45//* 46//* @par description: 47//* 48//* @param[in] pu1_ref 49//* uword8 pointer to the source 50//* 51//* @param[out] pu1_dst 52//* uword8 pointer to the destination 53//* 54//* @param[in] src_strd 55//* integer source stride 56//* 57//* @param[in] dst_strd 58//* integer destination stride 59//* 60//* @param[in] pi1_coeff 61//* word8 pointer to the planar coefficients 62//* 63//* @param[in] nt 64//* size of tranform block 65//* 66//* @param[in] mode 67//* type of filtering 68//* 69//* @returns 70//* 71//* @remarks 72//* none 73//* 74//******************************************************************************* 75//*/ 76 77//void ihevc_intra_pred_chroma_mode_18_34(uword8 *pu1_ref, 78// word32 src_strd, 79// uword8 *pu1_dst, 80// word32 dst_strd, 81// word32 nt, 82// word32 mode) 83// 84//**************variables vs registers***************************************** 85//x0 => *pu1_ref 86//x1 => src_strd 87//x2 => *pu1_dst 88//x3 => dst_strd 89 90//stack contents from #40 91// nt 92// mode 93// pi1_coeff 94 95.text 96.align 4 97.include "ihevc_neon_macros.s" 98 99 100 101.globl ihevc_intra_pred_chroma_mode_18_34_av8 102 103.type ihevc_intra_pred_chroma_mode_18_34_av8, %function 104 105ihevc_intra_pred_chroma_mode_18_34_av8: 106 107 // stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments 108 109 stp x19, x20,[sp,#-16]! 110 111 112 cmp x4,#4 113 beq mode2_4 114 115 mov x12,x4 116 mov x11,x4 117 add x0,x0,x4,lsl #2 118 119 cmp x5,#0x22 120 mov x10,x2 121 122 add x0,x0,#4 123 124 sub x20,x0,#4 125 csel x0, x20, x0,ne 126 mov x20,#2 127 csel x6, x20, x6,eq 128 mov x20,#-2 129 csel x6, x20, x6,ne 130 mov x8,x0 131 132 133kernel: 134 135 136 ld1 {v0.8b, v1.8b},[x8],x6 137 st1 {v0.8b, v1.8b},[x10],x3 138 ld1 {v2.8b, v3.8b},[x8],x6 139 st1 {v2.8b, v3.8b},[x10],x3 140 ld1 {v4.8b, v5.8b},[x8],x6 141 st1 {v4.8b, v5.8b},[x10],x3 142 ld1 {v6.8b, v7.8b},[x8],x6 143 st1 {v6.8b, v7.8b},[x10],x3 144 ld1 {v16.8b, v17.8b},[x8],x6 145 st1 {v16.8b, v17.8b},[x10],x3 146 ld1 {v18.8b, v19.8b},[x8],x6 147 st1 {v18.8b, v19.8b},[x10],x3 148 ld1 {v20.8b, v21.8b},[x8],x6 149 st1 {v20.8b, v21.8b},[x10],x3 150 ld1 {v22.8b, v23.8b},[x8],x6 151 st1 {v22.8b, v23.8b},[x10],x3 152 153 subs x12,x12,#8 154 bne kernel 155 156 cmp x11,#16 157 add x8,x0,#16 158 add x10,x2,#16 159 sub x11, x11,#16 160 mov x12,#16 161 beq kernel 162 b end_func 163 164mode2_4: 165 166 add x0,x0,#20 167 cmp x5,#0x22 168 sub x20,x0,#4 169 csel x0, x20, x0,ne 170 171 mov x20,#2 172 csel x8, x20, x8,eq 173 mov x20,#-2 174 csel x8, x20, x8,ne 175 176 ld1 {v0.8b},[x0],x8 177 st1 {v0.2s},[x2],x3 178 179 ld1 {v0.8b},[x0],x8 180 st1 {v0.2s},[x2],x3 181 182 ld1 {v0.8b},[x0],x8 183 st1 {v0.2s},[x2],x3 184 185 ld1 {v0.8b},[x0],x8 186 st1 {v0.2s},[x2],x3 187 188end_func: 189 // ldmfd sp!,{x4-x12,x15} //reload the registers from sp 190 ldp x19, x20,[sp],#16 191 192 ret 193 194 195 196 197 198 199