10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 20c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name: armVCM4P10_UnpackBlock4x4_s.s 40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2 50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision: 9641 60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date: Thursday, February 7, 2008 70c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 90c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 100c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 110c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 120c1bc742181ded4930842b46e9507372f0b1b963James Dong 130c1bc742181ded4930842b46e9507372f0b1b963James Dong 140c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE omxtypes_s.h 150c1bc742181ded4930842b46e9507372f0b1b963James Dong INCLUDE armCOMM_s.h 160c1bc742181ded4930842b46e9507372f0b1b963James Dong 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Define the processor variants supported by this file 180c1bc742181ded4930842b46e9507372f0b1b963James Dong 190c1bc742181ded4930842b46e9507372f0b1b963James Dong M_VARIANTS ARM1136JS 200c1bc742181ded4930842b46e9507372f0b1b963James Dong 210c1bc742181ded4930842b46e9507372f0b1b963James Dong 220c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 230c1bc742181ded4930842b46e9507372f0b1b963James Dong 240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------- 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Input Arguments and their scope/usage 260c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------- 270c1bc742181ded4930842b46e9507372f0b1b963James DongppSrc RN 0 ;// Persistent variable 280c1bc742181ded4930842b46e9507372f0b1b963James DongpDst RN 1 ;// Persistent variable 290c1bc742181ded4930842b46e9507372f0b1b963James Dong 300c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------- 310c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Variables and their scope/usage 320c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------- 330c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc RN 2 ;// Persistent variables 340c1bc742181ded4930842b46e9507372f0b1b963James DongFlag RN 3 350c1bc742181ded4930842b46e9507372f0b1b963James DongValue RN 4 360c1bc742181ded4930842b46e9507372f0b1b963James DongValue2 RN 5 370c1bc742181ded4930842b46e9507372f0b1b963James DongstrOffset RN 6 380c1bc742181ded4930842b46e9507372f0b1b963James DongcstOffset RN 7 390c1bc742181ded4930842b46e9507372f0b1b963James Dong 400c1bc742181ded4930842b46e9507372f0b1b963James Dong 410c1bc742181ded4930842b46e9507372f0b1b963James Dong M_START armVCM4P10_UnpackBlock4x4, r7 420c1bc742181ded4930842b46e9507372f0b1b963James Dong 430c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pSrc, [ppSrc] ;// Load pSrc 440c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV cstOffset, #31 ;// To be used in the loop, to compute offset 450c1bc742181ded4930842b46e9507372f0b1b963James Dong 460c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//----------------------------------------------------------------------- 470c1bc742181ded4930842b46e9507372f0b1b963James Dong ; Firstly, fill all the coefficient values on the <pDst> buffer by zero 480c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//----------------------------------------------------------------------- 490c1bc742181ded4930842b46e9507372f0b1b963James Dong 500c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Value, #0 ;// Initialize the zero value 510c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV Value2, #0 ;// Initialize the zero value 520c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB Flag, [pSrc], #1 ;// Preload <Flag> before <unpackLoop> 530c1bc742181ded4930842b46e9507372f0b1b963James Dong 540c1bc742181ded4930842b46e9507372f0b1b963James Dong STRD Value, [pDst, #0] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0 550c1bc742181ded4930842b46e9507372f0b1b963James Dong STRD Value, [pDst, #8] ;// pDst[4] = pDst[5] = pDst[6] = pDst[7] = 0 560c1bc742181ded4930842b46e9507372f0b1b963James Dong STRD Value, [pDst, #16] ;// pDst[8] = pDst[9] = pDst[10] = pDst[11] = 0 570c1bc742181ded4930842b46e9507372f0b1b963James Dong STRD Value, [pDst, #24] ;// pDst[12] = pDst[13] = pDst[14] = pDst[15] = 0 580c1bc742181ded4930842b46e9507372f0b1b963James Dong 590c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//---------------------------------------------------------------------------- 600c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// The loop below parses and unpacks the input stream. The C-model has 610c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// a somewhat complicated logic for sign extension. But in the v6 version, 620c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// that can be easily taken care by loading the data from <pSrc> stream as 630c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// SIGNED byte/halfword. So, based on the first TST instruction, 8-bits or 640c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 16-bits are read. 650c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 660c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Next, to compute the offset, where the unpacked value needs to be stored, 670c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// we modify the computation to perform [(Flag & 15) < 1] as [(Flag < 1) & 31] 680c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// This results in a saving of one cycle. 690c1bc742181ded4930842b46e9507372f0b1b963James Dong ;//---------------------------------------------------------------------------- 700c1bc742181ded4930842b46e9507372f0b1b963James Dong 710c1bc742181ded4930842b46e9507372f0b1b963James DongunpackLoop 720c1bc742181ded4930842b46e9507372f0b1b963James Dong TST Flag, #0x10 ;// Computing (Flag & 0x10) 730c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRSBNE Value2,[pSrc,#1] ;// Load byte wise to avoid unaligned access 740c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRBNE Value, [pSrc], #2 750c1bc742181ded4930842b46e9507372f0b1b963James Dong AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1; 760c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++ 770c1bc742181ded4930842b46e9507372f0b1b963James Dong ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++ 780c1bc742181ded4930842b46e9507372f0b1b963James Dong 790c1bc742181ded4930842b46e9507372f0b1b963James Dong TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done 800c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration 810c1bc742181ded4930842b46e9507372f0b1b963James Dong STRH Value, [pDst, strOffset] ;// Store <Value> at offset <strOffset> 820c1bc742181ded4930842b46e9507372f0b1b963James Dong BEQ unpackLoop ;// Branch to the loop beginning 830c1bc742181ded4930842b46e9507372f0b1b963James Dong 840c1bc742181ded4930842b46e9507372f0b1b963James Dong STR pSrc, [ppSrc] ;// Update the bitstream pointer 850c1bc742181ded4930842b46e9507372f0b1b963James Dong M_END 860c1bc742181ded4930842b46e9507372f0b1b963James Dong 870c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 880c1bc742181ded4930842b46e9507372f0b1b963James Dong 890c1bc742181ded4930842b46e9507372f0b1b963James Dong 900c1bc742181ded4930842b46e9507372f0b1b963James Dong 910c1bc742181ded4930842b46e9507372f0b1b963James Dong END 920c1bc742181ded4930842b46e9507372f0b1b963James Dong