10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited 378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License"); 578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License. 678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at 778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// http://www.apache.org/licenses/LICENSE-2.0 978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software 1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS, 1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and 1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License. 1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 9641 210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Thursday, February 7, 2008 220c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 240c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 260c1bc742181ded4930842b46e9507372f0b1b963James Dong 270c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 280c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 290c1bc742181ded4930842b46e9507372f0b1b963James Dong 300c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 310c1bc742181ded4930842b46e9507372f0b1b963James Dong 320c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS ARM1136JS 330c1bc742181ded4930842b46e9507372f0b1b963James Dong 340c1bc742181ded4930842b46e9507372f0b1b963James Dong 350c1bc742181ded4930842b46e9507372f0b1b963James Dong 360c1bc742181ded4930842b46e9507372f0b1b963James Dong 370c1bc742181ded4930842b46e9507372f0b1b963James Dong 380c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 390c1bc742181ded4930842b46e9507372f0b1b963James Dong 400c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC8 ppDstArgs, 8 410c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 ppSrc, 4 420c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 ppDst, 4 430c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ALLOC4 pCounter, 4 440c1bc742181ded4930842b46e9507372f0b1b963James Dong 450c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Function header 460c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Function: 470c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 480c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 490c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Implements diagonal interpolation for a block of size 4x4. Input and output should 500c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// be aligned. 510c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 520c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Registers used as input for this function 530c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// r0,r1,r2,r3, r8 where r0,r2 input pointer and r1,r3 step size, r8 intermediate-buf pointer 540c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 550c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Registers preserved for top level function 560c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// r0,r1,r2,r3,r4,r5,r6,r14 570c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 580c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Registers modified by the function 590c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// r7,r8,r9,r10,r11,r12 600c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 610c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Output registers 620c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// None. Function will preserve r0-r3 630c1bc742181ded4930842b46e9507372f0b1b963James Dong 640c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe, r6 650c1bc742181ded4930842b46e9507372f0b1b963James Dong 660c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers 670c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc RN 0 680c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep RN 1 690c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 2 700c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep RN 3 710c1bc742181ded4930842b46e9507372f0b1b963James Dong 720c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare inner loop registers 730c1bc742181ded4930842b46e9507372f0b1b963James DongValA RN 5 740c1bc742181ded4930842b46e9507372f0b1b963James DongValA0 RN 4 750c1bc742181ded4930842b46e9507372f0b1b963James DongValA1 RN 5 760c1bc742181ded4930842b46e9507372f0b1b963James DongValAF0 RN 4 770c1bc742181ded4930842b46e9507372f0b1b963James DongValAF1 RN 5 780c1bc742181ded4930842b46e9507372f0b1b963James Dong 790c1bc742181ded4930842b46e9507372f0b1b963James DongValB RN 11 800c1bc742181ded4930842b46e9507372f0b1b963James Dong 810c1bc742181ded4930842b46e9507372f0b1b963James DongValC RN 5 820c1bc742181ded4930842b46e9507372f0b1b963James DongValC0 RN 4 830c1bc742181ded4930842b46e9507372f0b1b963James DongValC1 RN 5 840c1bc742181ded4930842b46e9507372f0b1b963James DongValCD0 RN 12 850c1bc742181ded4930842b46e9507372f0b1b963James DongValCD1 RN 14 860c1bc742181ded4930842b46e9507372f0b1b963James DongValCF0 RN 4 870c1bc742181ded4930842b46e9507372f0b1b963James DongValCF1 RN 5 880c1bc742181ded4930842b46e9507372f0b1b963James Dong 890c1bc742181ded4930842b46e9507372f0b1b963James DongValD RN 10 900c1bc742181ded4930842b46e9507372f0b1b963James Dong 910c1bc742181ded4930842b46e9507372f0b1b963James DongValE RN 7 920c1bc742181ded4930842b46e9507372f0b1b963James DongValE0 RN 6 930c1bc742181ded4930842b46e9507372f0b1b963James DongValE1 RN 7 940c1bc742181ded4930842b46e9507372f0b1b963James DongValEB0 RN 10 950c1bc742181ded4930842b46e9507372f0b1b963James DongValEB1 RN 11 960c1bc742181ded4930842b46e9507372f0b1b963James DongValED0 RN 6 970c1bc742181ded4930842b46e9507372f0b1b963James DongValED1 RN 7 980c1bc742181ded4930842b46e9507372f0b1b963James Dong 990c1bc742181ded4930842b46e9507372f0b1b963James DongValF RN 10 1000c1bc742181ded4930842b46e9507372f0b1b963James Dong 1010c1bc742181ded4930842b46e9507372f0b1b963James DongValG RN 14 1020c1bc742181ded4930842b46e9507372f0b1b963James DongValG0 RN 12 1030c1bc742181ded4930842b46e9507372f0b1b963James DongValG1 RN 14 1040c1bc742181ded4930842b46e9507372f0b1b963James DongValGB0 RN 12 1050c1bc742181ded4930842b46e9507372f0b1b963James DongValGB1 RN 14 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong 1070c1bc742181ded4930842b46e9507372f0b1b963James DongAcc0 RN 4 1080c1bc742181ded4930842b46e9507372f0b1b963James DongAcc1 RN 5 1090c1bc742181ded4930842b46e9507372f0b1b963James DongAcc2 RN 6 1100c1bc742181ded4930842b46e9507372f0b1b963James DongAcc3 RN 7 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong 1120c1bc742181ded4930842b46e9507372f0b1b963James DongTemp RN 7 1130c1bc742181ded4930842b46e9507372f0b1b963James DongStep RN 6 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong 1150c1bc742181ded4930842b46e9507372f0b1b963James DongpInterBuf RN 8 1160c1bc742181ded4930842b46e9507372f0b1b963James DongCounter RN 8 1170c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00ff00ff RN 9 ;// [0 255 0 255] where 255 is offset 1180c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0001fc00 RN 10 ;// [0 (16*255 - 16) 0 (16*255 - 16)] 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare inner loop registers 1220c1bc742181ded4930842b46e9507372f0b1b963James DongValCA RN 8 1230c1bc742181ded4930842b46e9507372f0b1b963James DongValDB RN 9 1240c1bc742181ded4930842b46e9507372f0b1b963James DongValGE RN 10 1250c1bc742181ded4930842b46e9507372f0b1b963James DongValHF RN 11 1260c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00140001 RN 12 1270c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0014fffb RN 14 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong 1290c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00000200 RN 12 1300c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x000000ff RN 12 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STRD pDst, dstStep, ppDstArgs 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pDst, pInterBuf 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV dstStep, #24 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Set up counter of format, [0] [0] [1 (height)] [8 (width)] 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Counter, #1 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Temp, #8 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Counter, Temp, Counter, LSL #8 ;// [0 0 H W] 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results 1420c1bc742181ded4930842b46e9507372f0b1b963James DongWidthLoop 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR pSrc, ppSrc 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR pDst, ppDst 1450c1bc742181ded4930842b46e9507372f0b1b963James DongHeightLoop 1460c1bc742181ded4930842b46e9507372f0b1b963James DongTwoRowsLoop 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR ValC, [pSrc], srcStep ;// Load [c3 c2 c1 c0] 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR ValD, [pSrc], srcStep ;// Load [d3 d2 d1 d0] 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR ValE, [pSrc], srcStep ;// Load [e3 e2 e1 e0] 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #2 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValC0, r0x00ff00ff, ValC ;// [0 c2 0 c0] + [0 255 0 255] 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValC1, r0x00ff00ff, ValC, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValB, [pSrc] ;// Load [b3 b2 b1 b0] 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValE0, r0x00ff00ff, ValE ;// [0 e2 0 e0] + [0 255 0 255] 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValE1, r0x00ff00ff, ValE, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValCD0, ValC0, ValD ;// [0 c2 0 c0] + [0 255 0 255] + [0 d2 0 d0] 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValCD1, ValC1, ValD, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 d3 0 d1] 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValEB0, ValE0, ValB ;// [0 e2 0 e0] + [0 255 0 255] + [0 b2 0 b0] 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB ValCD0, ValEB0, ValCD0, LSL #2 ;// 4*(Off+C+D) - (Off+B+E) 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValD, [pSrc, srcStep, LSL #1] ;// Load [d3 d2 d1 d0] 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValEB1, ValE1, ValB, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 b3 0 b1] 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB ValCD1, ValEB1, ValCD1, LSL #2 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong 1650c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValED0, ValE0, ValD ;// [0 e2 0 e0] + [0 255 0 255] + [0 d2 0 d0] 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValED1, ValE1, ValD, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 d3 0 d1] 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValF, [pSrc, srcStep, LSL #2] ;// Load [f3 f2 f1 f0] 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR ValB, [pSrc], srcStep ;// Load [b3 b2 b1 b0] 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ValCD0, ValCD0, ValCD0, LSL #2 ;// 5 * [4*(Off+C+D) - (Off+B+E)] 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ValCD1, ValCD1, ValCD1, LSL #2 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValCF1, ValC1, ValF, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 f3 0 f1] 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValCF0, ValC0, ValF ;// [0 c2 0 c0] + [0 255 0 255] + [0 f2 0 f0] 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB ValED1, ValCF1, ValED1, LSL #2 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB ValA, pSrc, srcStep, LSL #1 1760c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValA, [ValA] ;// Load [a3 a2 a1 a0] 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB ValED0, ValCF0, ValED0, LSL #2 ;// 4*(Off+E+D) - (Off+C+F) 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ValED1, ValED1, ValED1, LSL #2 1790c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ValED0, ValED0, ValED0, LSL #2 ;// 5 * [4*(Off+E+D) - (Off+C+F)] 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValA0, r0x00ff00ff, ValA ;// [0 a2 0 a0] + [0 255 0 255] 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValA1, r0x00ff00ff, ValA, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255] 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValAF0, ValA0, ValF ;// [0 a2 0 a0] + [0 255 0 255] + [0 f2 0 f0] 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValAF1, ValA1, ValF, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255] + [0 f3 0 f1] 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Acc1, ValCD1, ValAF1 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValG, [pSrc, srcStep, LSL #2] ;// Load [g3 g2 g1 g0] 1870c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Acc0, ValCD0, ValAF0 ;// Acc0 = 16*Off + (A+F) + 20*(C+D) - 5*(B+E) 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc1, [pDst, #4] ;// Store result & adjust pointer 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR Acc0, [pDst], dstStep ;// Store result & adjust pointer 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValG0, r0x00ff00ff, ValG ;// [0 g2 0 g0] + [0 255 0 255] 1910c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValG1, r0x00ff00ff, ValG, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255] 1920c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValGB0, ValG0, ValB ;// [0 g2 0 g0] + [0 255 0 255] + [0 b2 0 b0] 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 ValGB1, ValG1, ValB, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255] + [0 b3 0 b1] 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Acc2, ValED0, ValGB0 ;// Acc2 = 16*Off + (B+G) + 20*(D+E) - 5*(C+F) 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Acc3, ValED1, ValGB1 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong STR Acc3, [pDst, #4] ;// Store result & adjust pointer 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR Acc2, [pDst], dstStep ;// Store result & adjust pointer 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong 2000c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS Counter, Counter, #1 << 8 ;// Loop till height is 10 2010c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, srcStep, LSL #1 2020c1bc742181ded4930842b46e9507372f0b1b963James Dong BPL HeightLoop 2030c1bc742181ded4930842b46e9507372f0b1b963James Dong 2040c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR pSrc, ppSrc 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR pDst, ppDst 2060c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS Counter, Counter, #(1 << 8)-4 ;// Loop till width is 12 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pSrc, pSrc, #4 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDst, pDst, #8 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD Counter, Counter, #1<<8 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong BPL WidthLoop 2110c1bc742181ded4930842b46e9507372f0b1b963James Dong 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Horizontal interpolation using multiplication 2140c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pDst, #24 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV srcStep, #24 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDRD pDst, dstStep, ppDstArgs 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Counter, #4 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x0014fffb, =0x0014fffb 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x00140001, =0x00140001 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong 2240c1bc742181ded4930842b46e9507372f0b1b963James DongHeightLoop1 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR Counter, pCounter 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValCA, [pSrc], #4 ;// Load [0 c 0 a] 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValDB, [pSrc], #4 ;// Load [0 d 0 b] 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValGE, [pSrc], #4 ;// Load [0 g 0 e] 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ValHF, [pSrc], #4 ;// Load [0 h 0 f] 2320c1bc742181ded4930842b46e9507372f0b1b963James Dong 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc0 = smuad ([0 20 0 1], add([0 c 0 a] + [0 d 0 f])) - (5 * (b + e)) 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc1 = smuad ([0 20 0 1], add([0 e 0 g] + [0 d 0 b])) - (5 * (c + f)) 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc2 = smuad ([0 1 0 20], add([0 c 0 e] + [0 h 0 f])) - (5 * (d + g)) 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Acc3 = smuad ([0 20 0 1], add([0 d 0 f] + [0 i 0 g])) - (5 * (e + h)) 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD Acc0, ValCA, r0x00140001 ;// Acc0 = [0 c 0 a] * [0 20 0 1] 2380c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD Acc1, ValDB, r0x00140001 ;// Acc1 = [0 c 0 a] * [0 20 0 1] 2390c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUADX Acc2, ValGE, r0x0014fffb ;// Acc2 = [0 g 0 e] * [0 20 0 -5] 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD Acc3, ValGE, r0x0014fffb ;// Acc3 = [0 g 0 e] * [0 20 0 -5] 2410c1bc742181ded4930842b46e9507372f0b1b963James Dong 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD Acc0, ValDB, r0x0014fffb, Acc0 ;// Acc0 += [0 d 0 b] * [0 20 0 -5] 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX Acc1, ValGE, r0x00140001, Acc1 ;// Acc1 += [0 g 0 e] * [0 20 0 1] 2440c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX Acc2, ValHF, r0x00140001, Acc2 ;// Acc2 += [0 h 0 f] * [0 20 0 1] 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX Acc3, ValHF, r0x0014fffb, Acc3 ;// Acc3 += [0 h 0 f] * [0 20 0 -5] 2460c1bc742181ded4930842b46e9507372f0b1b963James Dong 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB Acc0, ValGE, r0x0014fffb, Acc0 ;// Acc0 += [0 g 0 e] * [0 0 0 -5] 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB Acc1, ValCA, r0x0014fffb, Acc1 ;// Acc1 += [0 d 0 b] * [0 0 0 -5] 2490c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB Acc2, ValCA, r0x00140001, Acc2 ;// Acc2 += [0 c 0 a] * [0 0 0 1] 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB Acc3, ValDB, r0x00140001, Acc3 ;// Acc3 += [0 c 0 a] * [0 0 0 1] 2510c1bc742181ded4930842b46e9507372f0b1b963James Dong 2520c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRH ValCA, [pSrc], #8 ;// 8 = srcStep - 16 2530c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB Acc0, ValHF, r0x00140001, Acc0 ;// Acc0 += [0 h 0 f] * [0 0 0 1] 2540c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB Acc1, ValHF, r0x0014fffb, Acc1 ;// Acc1 += [0 h 0 f] * [0 0 0 -5] 2550c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB Acc2, ValDB, r0x0014fffb, Acc2 ;// Acc2 += [0 d 0 b] * [0 0 0 -5] 2560c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB Acc3, ValCA, r0x00140001, Acc3 ;// Acc3 += [0 d 0 b] * [0 0 0 1] 2570c1bc742181ded4930842b46e9507372f0b1b963James Dong 2580c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR r0x0001fc00, =0x0001fc00 ;// (0xff * 16 * 32) - 512 2590c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB Acc0, Acc0, r0x0001fc00 2600c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB Acc1, Acc1, r0x0001fc00 2610c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB Acc2, Acc2, r0x0001fc00 2620c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB Acc3, Acc3, r0x0001fc00 2630c1bc742181ded4930842b46e9507372f0b1b963James Dong 2640c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT Acc0, #18, Acc0 2650c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT Acc1, #18, Acc1 2660c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT Acc2, #18, Acc2 2670c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT Acc3, #18, Acc3 2680c1bc742181ded4930842b46e9507372f0b1b963James Dong 2690c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Acc0, Acc0, LSR #10 2700c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Acc1, Acc1, LSR #10 2710c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Acc2, Acc2, LSR #10 2720c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Acc3, Acc3, LSR #10 2730c1bc742181ded4930842b46e9507372f0b1b963James Dong 2740c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR Counter, pCounter 2750c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR Acc0, Acc0, Acc1, LSL #8 2760c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR Acc2, Acc2, Acc3, LSL #8 2770c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS Counter, Counter, #1 2780c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR Acc0, Acc0, Acc2, LSL #16 2790c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR Acc0, [pDst], dstStep 2800c1bc742181ded4930842b46e9507372f0b1b963James Dong BGT HeightLoop1 2810c1bc742181ded4930842b46e9507372f0b1b963James DongEnd 2820c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pDst, pDst, dstStep, LSL #2 2830c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, srcStep, LSL #2 2840c1bc742181ded4930842b46e9507372f0b1b963James Dong 2850c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 2860c1bc742181ded4930842b46e9507372f0b1b963James Dong 2870c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 2880c1bc742181ded4930842b46e9507372f0b1b963James Dong 2890c1bc742181ded4930842b46e9507372f0b1b963James Dong END 2900c1bc742181ded4930842b46e9507372f0b1b963James Dong 291