10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/***************************************************************************** 20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Licensed under the Apache License, Version 2.0 (the "License"); 60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* you may not use this file except in compliance with the License. 70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* You may obtain a copy of the License at: 80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* http://www.apache.org/licenses/LICENSE-2.0 100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Unless required by applicable law or agreed to in writing, software 120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* distributed under the License is distributed on an "AS IS" BASIS, 130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* See the License for the specific language governing permissions and 150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* limitations under the License. 160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*****************************************************************************/ 180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/** 190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ ******************************************************************************* 200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * @file 210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * ihevc_itrans_recon_4x4_ttype1.s 220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * @brief 240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * contains function definitions for inverse transform and reconstruction 250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * @author 280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * naveen sr 290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * @par list of functions: 310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * - ihevc_itrans_recon_4x4_ttype1() 320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * @remarks 340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * none 350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ ******************************************************************************* 370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ */ 380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/* all the functions here are replicated from ihevc_itrans.c and modified to */ 400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/* include reconstruction */ 410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ 420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/** 430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ ******************************************************************************* 440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * @brief 460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * this function performs inverse transform type 1 (dst) and reconstruction 470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * for 4x4 input block 480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * @par description: 500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * performs inverse transform and adds the prediction data and clips output 510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * to 8 bit 520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * @param[in] pi2_src 540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * input 4x4 coefficients 550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * @param[in] pi2_tmp 570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * temporary 4x4 buffer for storing inverse 580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * transform 600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 1st stage output 610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * @param[in] pu1_pred 630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * prediction 4x4 block 640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * @param[out] pu1_dst 660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * output 4x4 block 670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * @param[in] src_strd 690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * input stride 700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * @param[in] pred_strd 720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * prediction stride 730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * @param[in] dst_strd 750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * output stride 760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * @param[in] zero_cols 780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * zero columns in pi2_src 790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * @returns void 810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * @remarks 830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * none 840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ * 850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ ******************************************************************************* 860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ */ 870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@void ihevc_itrans_recon_4x4_ttype1(word16 *pi2_src, 880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ word16 *pi2_tmp, 890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ uword8 *pu1_pred, 900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ uword8 *pu1_dst, 910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ word32 src_strd, 920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ word32 pred_strd, 930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ word32 dst_strd, 940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ word32 zero_cols) 950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@**************variables vs registers************************* 970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r0 => *pi2_src 980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r1 => *pi2_tmp 990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r2 => *pu1_pred 1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r3 => *pu1_dst 1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r4 => src_strd 1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r5 => pred_strd 1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r6 => dst_strd 1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r7 => zero_cols 1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.text 1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.align 4 1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.set shift_stage1_idct , 7 1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.set shift_stage2_idct , 12 1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl ihevc_itrans_recon_4x4_ttype1_a9q 1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.type ihevc_itrans_recon_4x4_ttype1_a9q, %function 1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarihevc_itrans_recon_4x4_ttype1_a9q: 1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments 1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldr r4,[sp,#40] @loading src_strd 1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldr r5,[sp,#44] @loading pred_strd 1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldr r6,[sp,#48] @loading dst_strd 1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldr r7,[sp,#52] @loading zero_cols 1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r4,r4,r4 @ src_strd in terms of word16 1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov r8,#29 1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov r9,#55 1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov r10,#74 1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov r11,#84 1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmov.i16 d4[0],r8 1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.16 d0,[r0],r4 @loading pi2_src 1st row 1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmov.i16 d4[1],r9 1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.16 d1,[r0],r4 @loading pi2_src 2nd row 1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmov.i16 d4[2],r10 1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.16 d2,[r0],r4 @loading pi2_src 3rd row 1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmov.i16 d4[3],r11 1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.16 d3,[r0],r4 @loading pi2_src 4th row 1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @ first stage computation starts 1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.s16 q3,d1,d4[2] @74 * pi2_src[1] 1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.s16 q3,d0,d4[0] @74 * pi2_src[1] + 29 * pi2_src[0] 1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.s16 q3,d3,d4[1] @74 * pi2_src[1] + 29 * pi2_src[0] + 55 * pi2_src[3] 1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.s16 q3,d2,d4[3] @pi2_out[0] = 29* pi2_src[0] + 74 * pi2_src[1] + 84* pi2_src[2] + 55 * pi2_src[3] 1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.s16 q4,d1,d4[2] @74 * pi2_src[1] 1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.s16 q4,d0,d4[1] @74 * pi2_src[1] + 55 * pi2_src[0] 1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.s16 q4,d2,d4[0] @74 * pi2_src[1] + 55 * pi2_src[0] - 29 * pi2_src[2] 1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.s16 q4,d3,d4[3] @pi2_out[1] = 74 * pi2_src[1] + 55 * pi2_src[0] - 29 * pi2_src[2] - 84 * pi2_src[3]) 1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.s16 q5,d0,d4[2] @ 74 * pi2_src[0] 1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.s16 q5,d2,d4[2] @ 74 * pi2_src[0] - 74 * pi2_src[2] 1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.s16 q5,d3,d4[2] @pi2_out[2] = 74 * pi2_src[0] - 74 * pi2_src[2] + 74 * pi2_src[3] 1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.s16 q6,d2,d4[1] @ 55 * pi2_src[2] 1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.s16 q6,d1,d4[2] @ 55 * pi2_src[2] - 74 * pi2_src[1] 1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.s16 q6,d3,d4[0] @ - 74 * pi2_src[1] + 55 * pi2_src[2] - 29 * pi2_src[3] 1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.s16 q6,d0,d4[3] @pi2_out[3] = 84 * pi2_src[0] - 74 * pi2_src[1] + 55 * pi2_src[2] - 29 * pi2_src[3] 1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrn.s32 d14,q3,#shift_stage1_idct @ (pi2_out[0] + rounding ) >> shift_stage1_idct 1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrn.s32 d15,q4,#shift_stage1_idct @ (pi2_out[1] + rounding ) >> shift_stage1_idct 1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrn.s32 d16,q5,#shift_stage1_idct @ (pi2_out[2] + rounding ) >> shift_stage1_idct 1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrn.s32 d17,q6,#shift_stage1_idct @ (pi2_out[3] + rounding ) >> shift_stage1_idct 1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.32 d18[0], [r2],r5 1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vtrn.16 d14,d15 1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vtrn.16 d16,d17 1700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vtrn.32 d14,d16 1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vtrn.32 d15,d17 1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @ output in d14,d15,d16,d17 1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @ first stage computation ends 1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @ second stage computation starts : copy pasting 1st stage 1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @ register changes 1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @ d14 - d0 1780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @ d15 - d1 1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @ d16 - d2 1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @ d17 - d3 1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.32 d18[1], [r2],r5 1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.s16 q3,d15,d4[2] @74 * pi2_src[1] 1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.s16 q3,d14,d4[0] @74 * pi2_src[1] + 29 * pi2_src[0] 1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.s16 q3,d17,d4[1] @74 * pi2_src[1] + 29 * pi2_src[0] + 55 * pi2_src[3] 1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.s16 q3,d16,d4[3] @pi2_out[0] = 29* pi2_src[0] + 74 * pi2_src[1] + 84* pi2_src[2] + 55 * pi2_src[3] 1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.s16 q4,d15,d4[2] @74 * pi2_src[1] 1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.s16 q4,d14,d4[1] @74 * pi2_src[1] + 55 * pi2_src[0] 1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.s16 q4,d16,d4[0] @74 * pi2_src[1] + 55 * pi2_src[0] - 29 * pi2_src[2] 1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.s16 q4,d17,d4[3] @pi2_out[1] = 74 * pi2_src[1] + 55 * pi2_src[0] - 29 * pi2_src[2] - 84 * pi2_src[3]) 1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.s16 q5,d14,d4[2] @ 74 * pi2_src[0] 1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.s16 q5,d16,d4[2] @ 74 * pi2_src[0] - 74 * pi2_src[2] 1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.s16 q5,d17,d4[2] @pi2_out[2] = 74 * pi2_src[0] - 74 * pi2_src[2] + 74 * pi2_src[3] 1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.32 d19[0], [r2],r5 1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.s16 q6,d16,d4[1] @ 55 * pi2_src[2] 1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.s16 q6,d15,d4[2] @ - 74 * pi2_src[1] + 55 * pi2_src[2] 1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.s16 q6,d17,d4[0] @ - 74 * pi2_src[1] + 55 * pi2_src[2] - 29 * pi2_src[3] 2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.s16 q6,d14,d4[3] @pi2_out[3] = 84 * pi2_src[0] - 74 * pi2_src[1] + 55 * pi2_src[2] - 29 * pi2_src[3] 2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrn.s32 d0,q3,#shift_stage2_idct @ (pi2_out[0] + rounding ) >> shift_stage1_idct 2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrn.s32 d1,q4,#shift_stage2_idct @ (pi2_out[1] + rounding ) >> shift_stage1_idct 2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrn.s32 d2,q5,#shift_stage2_idct @ (pi2_out[2] + rounding ) >> shift_stage1_idct 2050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrn.s32 d3,q6,#shift_stage2_idct @ (pi2_out[3] + rounding ) >> shift_stage1_idct 2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.32 d19[1], [r2],r5 2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vtrn.16 d0,d1 2080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vtrn.16 d2,d3 2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vtrn.32 d0,d2 2100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vtrn.32 d1,d3 2110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @ output in d0,d1,d2,d3 2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @ second stage computation ends 2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @ loading pred 2150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vaddw.u8 q0,q0,d18 @ pi2_out(16bit) + pu1_pred(8bit) 2170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqmovun.s16 d0,q0 @ clip_u8(pi2_out(16bit) + pu1_pred(8bit)) 2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vaddw.u8 q1,q1,d19 @ pi2_out(16bit) + pu1_pred(8bit) 2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqmovun.s16 d1,q1 @ clip_u8(pi2_out(16bit) + pu1_pred(8bit)) 2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @ storing destination 2220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.32 {d0[0]},[r3],r6 2230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.32 {d0[1]},[r3],r6 2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.32 {d1[0]},[r3],r6 2250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.32 {d1[1]},[r3],r6 2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldmfd sp!,{r4-r12,r15} @reload the registers from sp 2280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 237