10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited 378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License"); 578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License. 678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at 778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// http://www.apache.org/licenses/LICENSE-2.0 978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software 1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS, 1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and 1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License. 1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: armVCM4P10_DeblockingLuma_unsafe_s.s 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 9641 210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Thursday, February 7, 2008 220c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 240c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 260c1bc742181ded4930842b46e9507372f0b1b963James Dong 270c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 280c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 290c1bc742181ded4930842b46e9507372f0b1b963James Dong 300c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS ARM1136JS 310c1bc742181ded4930842b46e9507372f0b1b963James Dong 320c1bc742181ded4930842b46e9507372f0b1b963James Dong 330c1bc742181ded4930842b46e9507372f0b1b963James Dong 340c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 350c1bc742181ded4930842b46e9507372f0b1b963James Dong 360c1bc742181ded4930842b46e9507372f0b1b963James DongMASK_1 EQU 0x01010101 370c1bc742181ded4930842b46e9507372f0b1b963James Dong 380c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers 390c1bc742181ded4930842b46e9507372f0b1b963James Dong 400c1bc742181ded4930842b46e9507372f0b1b963James DongpQ0 RN 0 410c1bc742181ded4930842b46e9507372f0b1b963James DongStepArg RN 1 420c1bc742181ded4930842b46e9507372f0b1b963James DongtC0Arg RN 2 430c1bc742181ded4930842b46e9507372f0b1b963James Dongalpha RN 6 440c1bc742181ded4930842b46e9507372f0b1b963James Dong 450c1bc742181ded4930842b46e9507372f0b1b963James Dongbeta RN 14 460c1bc742181ded4930842b46e9507372f0b1b963James DongbS RN 14 470c1bc742181ded4930842b46e9507372f0b1b963James DongtC0 RN 14 480c1bc742181ded4930842b46e9507372f0b1b963James DongptC0 RN 1 490c1bc742181ded4930842b46e9507372f0b1b963James Dong 500c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare Local/Temporary variables 510c1bc742181ded4930842b46e9507372f0b1b963James Dong 520c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Pixels 530c1bc742181ded4930842b46e9507372f0b1b963James Dongp_0 RN 3 540c1bc742181ded4930842b46e9507372f0b1b963James Dongp_1 RN 5 550c1bc742181ded4930842b46e9507372f0b1b963James Dongp_2 RN 4 560c1bc742181ded4930842b46e9507372f0b1b963James Dongp_3 RN 2 570c1bc742181ded4930842b46e9507372f0b1b963James Dongq_0 RN 8 580c1bc742181ded4930842b46e9507372f0b1b963James Dongq_1 RN 9 590c1bc742181ded4930842b46e9507372f0b1b963James Dongq_2 RN 10 600c1bc742181ded4930842b46e9507372f0b1b963James Dongq_3 RN 12 610c1bc742181ded4930842b46e9507372f0b1b963James Dong 620c1bc742181ded4930842b46e9507372f0b1b963James Dong 630c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Filtering 640c1bc742181ded4930842b46e9507372f0b1b963James Dong 650c1bc742181ded4930842b46e9507372f0b1b963James Dongap0q0 RN 1 660c1bc742181ded4930842b46e9507372f0b1b963James Dongfilt RN 2 670c1bc742181ded4930842b46e9507372f0b1b963James Dong 680c1bc742181ded4930842b46e9507372f0b1b963James Dongm00 RN 7 690c1bc742181ded4930842b46e9507372f0b1b963James Dongm01 RN 11 700c1bc742181ded4930842b46e9507372f0b1b963James Dong 710c1bc742181ded4930842b46e9507372f0b1b963James Dongapflg RN 0 720c1bc742181ded4930842b46e9507372f0b1b963James Dongaqflg RN 6 730c1bc742181ded4930842b46e9507372f0b1b963James Dong 740c1bc742181ded4930842b46e9507372f0b1b963James DongtC RN 1 750c1bc742181ded4930842b46e9507372f0b1b963James Dong 760c1bc742181ded4930842b46e9507372f0b1b963James Dong 770c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Declarations for bSLT4 kernel 780c1bc742181ded4930842b46e9507372f0b1b963James Dong 790c1bc742181ded4930842b46e9507372f0b1b963James Dongpos RN 7 800c1bc742181ded4930842b46e9507372f0b1b963James Dongneg RN 12 810c1bc742181ded4930842b46e9507372f0b1b963James Dong 820c1bc742181ded4930842b46e9507372f0b1b963James DongP0a RN 1 830c1bc742181ded4930842b46e9507372f0b1b963James DongP1a RN 8 840c1bc742181ded4930842b46e9507372f0b1b963James DongQ0a RN 7 850c1bc742181ded4930842b46e9507372f0b1b963James DongQ1a RN 4 860c1bc742181ded4930842b46e9507372f0b1b963James Dong 870c1bc742181ded4930842b46e9507372f0b1b963James Dongu1 RN 3 880c1bc742181ded4930842b46e9507372f0b1b963James Dongmax RN 12 890c1bc742181ded4930842b46e9507372f0b1b963James Dongmin RN 2 900c1bc742181ded4930842b46e9507372f0b1b963James Dong 910c1bc742181ded4930842b46e9507372f0b1b963James Dong 920c1bc742181ded4930842b46e9507372f0b1b963James Dong 930c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Declarations for bSGE4 kernel 940c1bc742181ded4930842b46e9507372f0b1b963James Dong 950c1bc742181ded4930842b46e9507372f0b1b963James Dongq_3b RN 9 960c1bc742181ded4930842b46e9507372f0b1b963James Dongp_3b RN 0 970c1bc742181ded4930842b46e9507372f0b1b963James Dongapqflg RN 12 980c1bc742181ded4930842b46e9507372f0b1b963James Dong 990c1bc742181ded4930842b46e9507372f0b1b963James DongP0b RN 6 1000c1bc742181ded4930842b46e9507372f0b1b963James DongP1b RN 7 1010c1bc742181ded4930842b46e9507372f0b1b963James DongP2b RN 1 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong 1030c1bc742181ded4930842b46e9507372f0b1b963James DongQ0b RN 9 1040c1bc742181ded4930842b46e9507372f0b1b963James DongQ1b RN 0 1050c1bc742181ded4930842b46e9507372f0b1b963James DongQ2b RN 2 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Miscellanous 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong 1090c1bc742181ded4930842b46e9507372f0b1b963James Donga RN 0 1100c1bc742181ded4930842b46e9507372f0b1b963James Dongt0 RN 3 1110c1bc742181ded4930842b46e9507372f0b1b963James Dongt1 RN 12 1120c1bc742181ded4930842b46e9507372f0b1b963James Dongt2 RN 7 1130c1bc742181ded4930842b46e9507372f0b1b963James Dongt3 RN 11 1140c1bc742181ded4930842b46e9507372f0b1b963James Dongt4 RN 4 1150c1bc742181ded4930842b46e9507372f0b1b963James Dongt5 RN 1 1160c1bc742181ded4930842b46e9507372f0b1b963James Dongt8 RN 6 1170c1bc742181ded4930842b46e9507372f0b1b963James Dongt9 RN 14 1180c1bc742181ded4930842b46e9507372f0b1b963James Dongt10 RN 5 1190c1bc742181ded4930842b46e9507372f0b1b963James Dongt11 RN 9 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe() 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Inputs - 3,4,5,8,9,10 - Input Pixels (p0-p2,q0-q2) 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong;// - 2 - filt, 0 - apflg, 6 - aqflg 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// - 11 - m01, 7 - tC0 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Outputs - 1,8,7,11 - Output Pixels(P0a,P1a,Q0a,Q1a) 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers Corrupted - 0-3,5-12,14 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_DeblockingLumabSLT4_unsafe, lr 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Since beta <= 18 and alpha <= 255 we know 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// -254 <= p0-q0 <= 254 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// -17 <= q1-q0 <= 17 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// -17 <= p1-p0 <= 17 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3)) 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// = (4*q0 - 4*p0 + p1 - q1 + 4)>>3 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t1, p_1, p_0 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong MUL tC0, t2, m01 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t2, q_1, q_0 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB8 t1, t1, t2 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t2, p_0, q_0 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong AND t2, t2, m01 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB8 t1, t1, t2 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 t5, p_0, q_0 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB8 t1, t1, t2 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB8 t1, t1, t5 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV m00, #0 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD8 t1, t1, m01 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB8 t1, t1, t5 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tC = tC0 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// if (ap < beta) tC++; 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// if (aq < beta) tC++; 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t5, filt, m01 1650c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL tC0, tC0, m00 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong UQADD8 tC, tC0, apflg 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB8 t1, t1, m00 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong UQADD8 tC, tC, aqflg 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Split into positive and negative part and clip 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL pos, t1, m00 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 neg, pos, t1 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t3, pos, tC 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL pos, tC, pos 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t3, neg, tC 1760c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL neg, tC, neg 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//Reload m01 1790c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR m01,=MASK_1 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong UQADD8 P0a, p_0, pos 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong UQSUB8 Q0a, q_0, pos 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong UQSUB8 P0a, P0a, neg 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong UQADD8 Q0a, Q0a, neg 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Choose to store the filtered 1870c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// value or the original pixel 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t1, filt, m01 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL P0a, P0a, p_0 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL Q0a, Q0a, q_0 1910c1bc742181ded4930842b46e9507372f0b1b963James Dong 1920c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// delta = (p2 + ((p0+q0+1)>>1) - (p1<<1))>>1; 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// u1 = (p0 + q0 + 1)>>1 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// u1 = ( (q_0 - p_0')>>1 ) ^ 0x80 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN p_0, p_0 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 u1, q_0, p_0 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong UQADD8 max, p_1, tC0 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR u1, u1, m01 ,LSL #7 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong 2000c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Calculate A = (p2+u1)>>1 2010c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Then delta = Clip3( -tC0, tC0, A - p1) 2020c1bc742181ded4930842b46e9507372f0b1b963James Dong 2030c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Clip P1 2040c1bc742181ded4930842b46e9507372f0b1b963James Dong UHADD8 P1a, p_2, u1 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong UQSUB8 min, p_1, tC0 2060c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t4, P1a, max 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL P1a, max, P1a 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t4, P1a, min 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL P1a, P1a, min 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong 2110c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Clip Q1 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong UHADD8 Q1a, q_2, u1 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong UQADD8 max, q_1, tC0 2140c1bc742181ded4930842b46e9507372f0b1b963James Dong UQSUB8 min, q_1, tC0 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t0, Q1a, max 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL Q1a, max, Q1a 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t0, Q1a, min 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL Q1a, Q1a, min 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Choose to store the filtered 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// value or the original pixel 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t0, apflg, m01 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL P1a, P1a, p_1 2240c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t0, aqflg, m01 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL t3, Q1a, q_1 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe() 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Inputs - 3,4,5,8,9,10 - Input Pixels (p0-p2,q0-q2) 2320c1bc742181ded4930842b46e9507372f0b1b963James Dong;// - 2 - filt, 0 - apflg,aqflg 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// - 1 - ap0q0, 6 - alpha 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong;// - 7 - m00, 11 - m01 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Outputs - 6,7,1,9,0,2 - Output Pixels(P0b,P1b,P2b, Q0b,Q1b,Q2b) 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 2380c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers Corrupted - 0-3,5-12,14 2390c1bc742181ded4930842b46e9507372f0b1b963James Dong 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_DeblockingLumabSGE4_unsafe, lr 2410c1bc742181ded4930842b46e9507372f0b1b963James Dong 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// apflg = apflg && |p0-q0|<((alpha>>2)+2) 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// apflg = aqflg && |p0-q0|<((alpha>>2)+2) 2440c1bc742181ded4930842b46e9507372f0b1b963James Dong 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG pDummy,4 2460c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG pQ_3,4 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ARG pP_3,4 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong 2490c1bc742181ded4930842b46e9507372f0b1b963James Dong UHADD8 alpha, alpha, m00 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t9, p_2, p_0 ;//t9 = dp2p0 2510c1bc742181ded4930842b46e9507372f0b1b963James Dong UHADD8 alpha, alpha, m00 2520c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD alpha, alpha, m01, LSL #1 2530c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 ap0q0, ap0q0, alpha 2540c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL apqflg, m00, apflg 2550c1bc742181ded4930842b46e9507372f0b1b963James Dong 2560c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// P0 = (p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3 2570c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// = ((p2-p0) + 2*(p1-p0) + (q1-q0) + 3*(q0-p0) + 8*p0 + 4)>>3 2580c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// = p0 + (((p2-p0) + 2*(p1-p0) + (q1-q0) - 3*(p0-q0) + 4)>>3) 2590c1bc742181ded4930842b46e9507372f0b1b963James Dong 2600c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// P1 = (p2 + p1 + q0 + p0 + 2)>>2 2610c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// = p0 + (((p2-p0) + (p1-p0) - (p0-q0) + 2)>>2) 2620c1bc742181ded4930842b46e9507372f0b1b963James Dong 2630c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// P2 = (2*p3 + 3*p2 + p1 + p0 + q0 + 4)>>3 2640c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// = (2*(p3-p0) + 3*(p2-p0) + (p1-p0) - (p0-q0) + 8*p0 + 4)>>3 2650c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// = p0 + (((p3-p0) + (p2-p0) + t2 + 2)>>2) 2660c1bc742181ded4930842b46e9507372f0b1b963James Dong 2670c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Compute P0b 2680c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t2, p_0, q_0 2690c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB8 t5, t9, t2 2700c1bc742181ded4930842b46e9507372f0b1b963James Dong 2710c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t8, q_1, q_0 2720c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD8 t8, t5, t8 2730c1bc742181ded4930842b46e9507372f0b1b963James Dong 2740c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t9, p_1, p_0 2750c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD8 t8, t8, t9 2760c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB8 t8, t8, t2 2770c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD8 t5, t5, t9 2780c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD8 t8, t8, m01 2790c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD8 t9, t5, m01 2800c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD8 P0b, p_0, t8 2810c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// P0b ready 2820c1bc742181ded4930842b46e9507372f0b1b963James Dong 2830c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Compute P1b 2840c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR p_3b, pP_3 2850c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD8 P1b, p_0, t9 2860c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// P1b ready 2870c1bc742181ded4930842b46e9507372f0b1b963James Dong 2880c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Compute P2b 2890c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t9, p_2, p_0 2900c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD8 t5, t5, t9 2910c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 t9, p_3b, p_0 2920c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR a, p_3b, p_0 2930c1bc742181ded4930842b46e9507372f0b1b963James Dong AND a, a, m01 2940c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD8 t5, t5, a 2950c1bc742181ded4930842b46e9507372f0b1b963James Dong UHADD8 a, p_0, q_1 2960c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD8 t5, t5, m01 2970c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD8 t5, t5, t9 2980c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN t9, p_1 2990c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD8 P2b, p_0, t5 3000c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// P2b ready 3010c1bc742181ded4930842b46e9507372f0b1b963James Dong 3020c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 a, a, t9 3030c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR t9, apqflg, m01 3040c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t9, apqflg, t9 3050c1bc742181ded4930842b46e9507372f0b1b963James Dong 3060c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR a, a, m01, LSL #7 3070c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL P0b, P0b, a 3080c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL P1b, P1b, p_1 3090c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL P2b, P2b, p_2 3100c1bc742181ded4930842b46e9507372f0b1b963James Dong 3110c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t4, filt, m01 3120c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL P0b, P0b, p_0 3130c1bc742181ded4930842b46e9507372f0b1b963James Dong 3140c1bc742181ded4930842b46e9507372f0b1b963James Dong 3150c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Q0 = (q2 + 2*q1 + 2*q0 + 2*p0 + p1 + 4)>>3 3160c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// = ((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 8*q0 + 4)>>3 3170c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// = q0 + (((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 4)>>3) 3180c1bc742181ded4930842b46e9507372f0b1b963James Dong 3190c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Q1 = (q2 + q1 + p0 + q0 + 2)>>2 3200c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// = q0 + (((q2-q0) + (q1-q0) + (p0-q0) + 2)>>2) 3210c1bc742181ded4930842b46e9507372f0b1b963James Dong 3220c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Q2 = (2*q3 + 3*q2 + q1 + q0 + p0 + 4)>>3 3230c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// = (2*(q3-q0) + 3*(q2-q0) + (q1-q0) + (p0-q0) + 8*q0 + 4)>>3 3240c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// = q0 + (((q3-q0) + (q2-q0) + t2 + 2)>>2) 3250c1bc742181ded4930842b46e9507372f0b1b963James Dong 3260c1bc742181ded4930842b46e9507372f0b1b963James Dong 3270c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Compute Q0b Q1b 3280c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t4, q_2, q_0 3290c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 a, p_0, q_0 3300c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t9, p_1, p_0 3310c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD8 t0, t4, a 3320c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD8 t9, t0, t9 3330c1bc742181ded4930842b46e9507372f0b1b963James Dong UHADD8 t10, q_0, p_1 3340c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD8 t9, t9, a 3350c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 a, q_1, q_0 3360c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD8 t9, t9, a 3370c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD8 t0, t0, a 3380c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD8 t9, t9, m01 3390c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD8 a, t0, m01 3400c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD8 t9, q_0, t9 3410c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Q0b ready - t9 3420c1bc742181ded4930842b46e9507372f0b1b963James Dong 3430c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV t4, #0 3440c1bc742181ded4930842b46e9507372f0b1b963James Dong UHADD8 apqflg, apqflg, t4 3450c1bc742181ded4930842b46e9507372f0b1b963James Dong 3460c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD8 Q1b, q_0, a 3470c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Q1b ready 3480c1bc742181ded4930842b46e9507372f0b1b963James Dong 3490c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t4, apqflg, m01 3500c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL Q1b, Q1b, q_1 3510c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN t11, q_1 3520c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 t10, t10, t11 3530c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR q_3b, pQ_3 3540c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR t10, t10, m01, LSL #7 3550c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL t9, t9, t10 3560c1bc742181ded4930842b46e9507372f0b1b963James Dong 3570c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Compute Q2b 3580c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t4, q_2, q_0 3590c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD8 t4, t0, t4 3600c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR t0, q_3b, q_0 3610c1bc742181ded4930842b46e9507372f0b1b963James Dong AND t0, t0, m01 3620c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD8 t4, t4, t0 3630c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 t10, q_3b, q_0 3640c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD8 t4, t4, m01 3650c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD8 t4, t4, t10 3660c1bc742181ded4930842b46e9507372f0b1b963James Dong 3670c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t10, filt, m01 3680c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL Q0b, t9, q_0 3690c1bc742181ded4930842b46e9507372f0b1b963James Dong 3700c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD8 t4, q_0, t4 3710c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Q2b ready - t4 3720c1bc742181ded4930842b46e9507372f0b1b963James Dong 3730c1bc742181ded4930842b46e9507372f0b1b963James Dong USUB8 t10, apqflg, m01 3740c1bc742181ded4930842b46e9507372f0b1b963James Dong SEL Q2b, t4, q_2 3750c1bc742181ded4930842b46e9507372f0b1b963James Dong 3760c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 3770c1bc742181ded4930842b46e9507372f0b1b963James Dong 3780c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 3790c1bc742181ded4930842b46e9507372f0b1b963James Dong 38078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar END 381