1bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// 2bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// 3bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// File Name: armVCM4P10_DeblockingLuma_unsafe_s.s 4bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// OpenMAX DL: v1.0.2 5bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Revision: 9641 6bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Date: Thursday, February 7, 2008 7bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// 8bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// 10bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// 11bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// 12bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 13bebc99d6fa433c04139294a5057f8439d772dbd9James Dong INCLUDE omxtypes_s.h 14bebc99d6fa433c04139294a5057f8439d772dbd9James Dong INCLUDE armCOMM_s.h 15bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 16bebc99d6fa433c04139294a5057f8439d772dbd9James Dong M_VARIANTS ARM1136JS 17bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 18bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 19bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 20bebc99d6fa433c04139294a5057f8439d772dbd9James Dong IF ARM1136JS 21bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 22bebc99d6fa433c04139294a5057f8439d772dbd9James DongMASK_1 EQU 0x01010101 23bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 24bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Declare input registers 25bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 26bebc99d6fa433c04139294a5057f8439d772dbd9James DongpQ0 RN 0 27bebc99d6fa433c04139294a5057f8439d772dbd9James DongStepArg RN 1 28bebc99d6fa433c04139294a5057f8439d772dbd9James DongtC0Arg RN 2 29bebc99d6fa433c04139294a5057f8439d772dbd9James Dongalpha RN 6 30bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 31bebc99d6fa433c04139294a5057f8439d772dbd9James Dongbeta RN 14 32bebc99d6fa433c04139294a5057f8439d772dbd9James DongbS RN 14 33bebc99d6fa433c04139294a5057f8439d772dbd9James DongtC0 RN 14 34bebc99d6fa433c04139294a5057f8439d772dbd9James DongptC0 RN 1 35bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 36bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Declare Local/Temporary variables 37bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 38bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Pixels 39bebc99d6fa433c04139294a5057f8439d772dbd9James Dongp_0 RN 3 40bebc99d6fa433c04139294a5057f8439d772dbd9James Dongp_1 RN 5 41bebc99d6fa433c04139294a5057f8439d772dbd9James Dongp_2 RN 4 42bebc99d6fa433c04139294a5057f8439d772dbd9James Dongp_3 RN 2 43bebc99d6fa433c04139294a5057f8439d772dbd9James Dongq_0 RN 8 44bebc99d6fa433c04139294a5057f8439d772dbd9James Dongq_1 RN 9 45bebc99d6fa433c04139294a5057f8439d772dbd9James Dongq_2 RN 10 46bebc99d6fa433c04139294a5057f8439d772dbd9James Dongq_3 RN 12 47bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 48bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 49bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Filtering 50bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 51bebc99d6fa433c04139294a5057f8439d772dbd9James Dongap0q0 RN 1 52bebc99d6fa433c04139294a5057f8439d772dbd9James Dongfilt RN 2 53bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 54bebc99d6fa433c04139294a5057f8439d772dbd9James Dongm00 RN 7 55bebc99d6fa433c04139294a5057f8439d772dbd9James Dongm01 RN 11 56bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 57bebc99d6fa433c04139294a5057f8439d772dbd9James Dongapflg RN 0 58bebc99d6fa433c04139294a5057f8439d772dbd9James Dongaqflg RN 6 59bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 60bebc99d6fa433c04139294a5057f8439d772dbd9James DongtC RN 1 61bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 62bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 63bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//Declarations for bSLT4 kernel 64bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 65bebc99d6fa433c04139294a5057f8439d772dbd9James Dongpos RN 7 66bebc99d6fa433c04139294a5057f8439d772dbd9James Dongneg RN 12 67bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 68bebc99d6fa433c04139294a5057f8439d772dbd9James DongP0a RN 1 69bebc99d6fa433c04139294a5057f8439d772dbd9James DongP1a RN 8 70bebc99d6fa433c04139294a5057f8439d772dbd9James DongQ0a RN 7 71bebc99d6fa433c04139294a5057f8439d772dbd9James DongQ1a RN 4 72bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 73bebc99d6fa433c04139294a5057f8439d772dbd9James Dongu1 RN 3 74bebc99d6fa433c04139294a5057f8439d772dbd9James Dongmax RN 12 75bebc99d6fa433c04139294a5057f8439d772dbd9James Dongmin RN 2 76bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 77bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 78bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 79bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//Declarations for bSGE4 kernel 80bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 81bebc99d6fa433c04139294a5057f8439d772dbd9James Dongq_3b RN 9 82bebc99d6fa433c04139294a5057f8439d772dbd9James Dongp_3b RN 0 83bebc99d6fa433c04139294a5057f8439d772dbd9James Dongapqflg RN 12 84bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 85bebc99d6fa433c04139294a5057f8439d772dbd9James DongP0b RN 6 86bebc99d6fa433c04139294a5057f8439d772dbd9James DongP1b RN 7 87bebc99d6fa433c04139294a5057f8439d772dbd9James DongP2b RN 1 88bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 89bebc99d6fa433c04139294a5057f8439d772dbd9James DongQ0b RN 9 90bebc99d6fa433c04139294a5057f8439d772dbd9James DongQ1b RN 0 91bebc99d6fa433c04139294a5057f8439d772dbd9James DongQ2b RN 2 92bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 93bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Miscellanous 94bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 95bebc99d6fa433c04139294a5057f8439d772dbd9James Donga RN 0 96bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt0 RN 3 97bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt1 RN 12 98bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt2 RN 7 99bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt3 RN 11 100bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt4 RN 4 101bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt5 RN 1 102bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt8 RN 6 103bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt9 RN 14 104bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt10 RN 5 105bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt11 RN 9 106bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 107bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe() 108bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// 109bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Inputs - 3,4,5,8,9,10 - Input Pixels (p0-p2,q0-q2) 110bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// - 2 - filt, 0 - apflg, 6 - aqflg 111bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// - 11 - m01, 7 - tC0 112bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// 113bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Outputs - 1,8,7,11 - Output Pixels(P0a,P1a,Q0a,Q1a) 114bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// 115bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Registers Corrupted - 0-3,5-12,14 116bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 117bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 118bebc99d6fa433c04139294a5057f8439d772dbd9James Dong M_START armVCM4P10_DeblockingLumabSLT4_unsafe, lr 119bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 120bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Since beta <= 18 and alpha <= 255 we know 121bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// -254 <= p0-q0 <= 254 122bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// -17 <= q1-q0 <= 17 123bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// -17 <= p1-p0 <= 17 124bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 125bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3)) 126bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// 127bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3 128bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// = (4*q0 - 4*p0 + p1 - q1 + 4)>>3 129bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3 130bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 131bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t1, p_1, p_0 132bebc99d6fa433c04139294a5057f8439d772dbd9James Dong MUL tC0, t2, m01 133bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 134bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t2, q_1, q_0 135bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SSUB8 t1, t1, t2 136bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 137bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t2, p_0, q_0 138bebc99d6fa433c04139294a5057f8439d772dbd9James Dong AND t2, t2, m01 139bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SHSUB8 t1, t1, t2 140bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UHSUB8 t5, p_0, q_0 141bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SSUB8 t1, t1, t2 142bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SHSUB8 t1, t1, t5 143bebc99d6fa433c04139294a5057f8439d772dbd9James Dong MOV m00, #0 144bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SADD8 t1, t1, m01 145bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SHSUB8 t1, t1, t5 146bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 147bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// tC = tC0 148bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// if (ap < beta) tC++; 149bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// if (aq < beta) tC++; 150bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t5, filt, m01 151bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL tC0, tC0, m00 152bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UQADD8 tC, tC0, apflg 153bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SSUB8 t1, t1, m00 154bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UQADD8 tC, tC, aqflg 155bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 156bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Split into positive and negative part and clip 157bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL pos, t1, m00 158bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 neg, pos, t1 159bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t3, pos, tC 160bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL pos, tC, pos 161bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t3, neg, tC 162bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL neg, tC, neg 163bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 164bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;//Reload m01 165bebc99d6fa433c04139294a5057f8439d772dbd9James Dong LDR m01,=MASK_1 166bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 167bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UQADD8 P0a, p_0, pos 168bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UQSUB8 Q0a, q_0, pos 169bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UQSUB8 P0a, P0a, neg 170bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UQADD8 Q0a, Q0a, neg 171bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 172bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Choose to store the filtered 173bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// value or the original pixel 174bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t1, filt, m01 175bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL P0a, P0a, p_0 176bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL Q0a, Q0a, q_0 177bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 178bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// delta = (p2 + ((p0+q0+1)>>1) - (p1<<1))>>1; 179bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// u1 = (p0 + q0 + 1)>>1 180bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// u1 = ( (q_0 - p_0')>>1 ) ^ 0x80 181bebc99d6fa433c04139294a5057f8439d772dbd9James Dong MVN p_0, p_0 182bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UHSUB8 u1, q_0, p_0 183bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UQADD8 max, p_1, tC0 184bebc99d6fa433c04139294a5057f8439d772dbd9James Dong EOR u1, u1, m01 ,LSL #7 185bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 186bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Calculate A = (p2+u1)>>1 187bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Then delta = Clip3( -tC0, tC0, A - p1) 188bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 189bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Clip P1 190bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UHADD8 P1a, p_2, u1 191bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UQSUB8 min, p_1, tC0 192bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t4, P1a, max 193bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL P1a, max, P1a 194bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t4, P1a, min 195bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL P1a, P1a, min 196bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 197bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Clip Q1 198bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UHADD8 Q1a, q_2, u1 199bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UQADD8 max, q_1, tC0 200bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UQSUB8 min, q_1, tC0 201bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t0, Q1a, max 202bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL Q1a, max, Q1a 203bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t0, Q1a, min 204bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL Q1a, Q1a, min 205bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 206bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Choose to store the filtered 207bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// value or the original pixel 208bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t0, apflg, m01 209bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL P1a, P1a, p_1 210bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t0, aqflg, m01 211bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL t3, Q1a, q_1 212bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 213bebc99d6fa433c04139294a5057f8439d772dbd9James Dong M_END 214bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 215bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe() 216bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// 217bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Inputs - 3,4,5,8,9,10 - Input Pixels (p0-p2,q0-q2) 218bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// - 2 - filt, 0 - apflg,aqflg 219bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// - 1 - ap0q0, 6 - alpha 220bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// - 7 - m00, 11 - m01 221bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// 222bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Outputs - 6,7,1,9,0,2 - Output Pixels(P0b,P1b,P2b, Q0b,Q1b,Q2b) 223bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// 224bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Registers Corrupted - 0-3,5-12,14 225bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 226bebc99d6fa433c04139294a5057f8439d772dbd9James Dong M_START armVCM4P10_DeblockingLumabSGE4_unsafe, lr 227bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 228bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// apflg = apflg && |p0-q0|<((alpha>>2)+2) 229bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// apflg = aqflg && |p0-q0|<((alpha>>2)+2) 230bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 231bebc99d6fa433c04139294a5057f8439d772dbd9James Dong M_ARG pDummy,4 232bebc99d6fa433c04139294a5057f8439d772dbd9James Dong M_ARG pQ_3,4 233bebc99d6fa433c04139294a5057f8439d772dbd9James Dong M_ARG pP_3,4 234bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 235bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UHADD8 alpha, alpha, m00 236bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t9, p_2, p_0 ;//t9 = dp2p0 237bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UHADD8 alpha, alpha, m00 238bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ADD alpha, alpha, m01, LSL #1 239bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 ap0q0, ap0q0, alpha 240bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL apqflg, m00, apflg 241bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 242bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// P0 = (p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3 243bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// = ((p2-p0) + 2*(p1-p0) + (q1-q0) + 3*(q0-p0) + 8*p0 + 4)>>3 244bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// = p0 + (((p2-p0) + 2*(p1-p0) + (q1-q0) - 3*(p0-q0) + 4)>>3) 245bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 246bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// P1 = (p2 + p1 + q0 + p0 + 2)>>2 247bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// = p0 + (((p2-p0) + (p1-p0) - (p0-q0) + 2)>>2) 248bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 249bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// P2 = (2*p3 + 3*p2 + p1 + p0 + q0 + 4)>>3 250bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// = (2*(p3-p0) + 3*(p2-p0) + (p1-p0) - (p0-q0) + 8*p0 + 4)>>3 251bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// = p0 + (((p3-p0) + (p2-p0) + t2 + 2)>>2) 252bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 253bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Compute P0b 254bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t2, p_0, q_0 255bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SSUB8 t5, t9, t2 256bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 257bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t8, q_1, q_0 258bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SHADD8 t8, t5, t8 259bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 260bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t9, p_1, p_0 261bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SADD8 t8, t8, t9 262bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SHSUB8 t8, t8, t2 263bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SHADD8 t5, t5, t9 264bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SHADD8 t8, t8, m01 265bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SHADD8 t9, t5, m01 266bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SADD8 P0b, p_0, t8 267bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// P0b ready 268bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 269bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Compute P1b 270bebc99d6fa433c04139294a5057f8439d772dbd9James Dong M_LDR p_3b, pP_3 271bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SADD8 P1b, p_0, t9 272bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// P1b ready 273bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 274bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Compute P2b 275bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t9, p_2, p_0 276bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SADD8 t5, t5, t9 277bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UHSUB8 t9, p_3b, p_0 278bebc99d6fa433c04139294a5057f8439d772dbd9James Dong EOR a, p_3b, p_0 279bebc99d6fa433c04139294a5057f8439d772dbd9James Dong AND a, a, m01 280bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SHADD8 t5, t5, a 281bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UHADD8 a, p_0, q_1 282bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SADD8 t5, t5, m01 283bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SHADD8 t5, t5, t9 284bebc99d6fa433c04139294a5057f8439d772dbd9James Dong MVN t9, p_1 285bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SADD8 P2b, p_0, t5 286bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// P2b ready 287bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 288bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UHSUB8 a, a, t9 289bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ORR t9, apqflg, m01 290bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t9, apqflg, t9 291bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 292bebc99d6fa433c04139294a5057f8439d772dbd9James Dong EOR a, a, m01, LSL #7 293bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL P0b, P0b, a 294bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL P1b, P1b, p_1 295bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL P2b, P2b, p_2 296bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 297bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t4, filt, m01 298bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL P0b, P0b, p_0 299bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 300bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 301bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Q0 = (q2 + 2*q1 + 2*q0 + 2*p0 + p1 + 4)>>3 302bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// = ((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 8*q0 + 4)>>3 303bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// = q0 + (((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 4)>>3) 304bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 305bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Q1 = (q2 + q1 + p0 + q0 + 2)>>2 306bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// = q0 + (((q2-q0) + (q1-q0) + (p0-q0) + 2)>>2) 307bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 308bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Q2 = (2*q3 + 3*q2 + q1 + q0 + p0 + 4)>>3 309bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// = (2*(q3-q0) + 3*(q2-q0) + (q1-q0) + (p0-q0) + 8*q0 + 4)>>3 310bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// = q0 + (((q3-q0) + (q2-q0) + t2 + 2)>>2) 311bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 312bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 313bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Compute Q0b Q1b 314bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t4, q_2, q_0 315bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 a, p_0, q_0 316bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t9, p_1, p_0 317bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SADD8 t0, t4, a 318bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SHADD8 t9, t0, t9 319bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UHADD8 t10, q_0, p_1 320bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SADD8 t9, t9, a 321bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 a, q_1, q_0 322bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SHADD8 t9, t9, a 323bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SHADD8 t0, t0, a 324bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SHADD8 t9, t9, m01 325bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SHADD8 a, t0, m01 326bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SADD8 t9, q_0, t9 327bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Q0b ready - t9 328bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 329bebc99d6fa433c04139294a5057f8439d772dbd9James Dong MOV t4, #0 330bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UHADD8 apqflg, apqflg, t4 331bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 332bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SADD8 Q1b, q_0, a 333bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Q1b ready 334bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 335bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t4, apqflg, m01 336bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL Q1b, Q1b, q_1 337bebc99d6fa433c04139294a5057f8439d772dbd9James Dong MVN t11, q_1 338bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UHSUB8 t10, t10, t11 339bebc99d6fa433c04139294a5057f8439d772dbd9James Dong M_LDR q_3b, pQ_3 340bebc99d6fa433c04139294a5057f8439d772dbd9James Dong EOR t10, t10, m01, LSL #7 341bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL t9, t9, t10 342bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 343bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Compute Q2b 344bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t4, q_2, q_0 345bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SADD8 t4, t0, t4 346bebc99d6fa433c04139294a5057f8439d772dbd9James Dong EOR t0, q_3b, q_0 347bebc99d6fa433c04139294a5057f8439d772dbd9James Dong AND t0, t0, m01 348bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SHADD8 t4, t4, t0 349bebc99d6fa433c04139294a5057f8439d772dbd9James Dong UHSUB8 t10, q_3b, q_0 350bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SADD8 t4, t4, m01 351bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SHADD8 t4, t4, t10 352bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 353bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t10, filt, m01 354bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL Q0b, t9, q_0 355bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 356bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SADD8 t4, q_0, t4 357bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ;// Q2b ready - t4 358bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 359bebc99d6fa433c04139294a5057f8439d772dbd9James Dong USUB8 t10, apqflg, m01 360bebc99d6fa433c04139294a5057f8439d772dbd9James Dong SEL Q2b, t4, q_2 361bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 362bebc99d6fa433c04139294a5057f8439d772dbd9James Dong M_END 363bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 364bebc99d6fa433c04139294a5057f8439d772dbd9James Dong ENDIF 365bebc99d6fa433c04139294a5057f8439d772dbd9James Dong 366bebc99d6fa433c04139294a5057f8439d772dbd9James Dong END