1;// 2;// 3;// File Name: armVCM4P10_UnpackBlock4x4_s.s 4;// OpenMAX DL: v1.0.2 5;// Revision: 9641 6;// Date: Thursday, February 7, 2008 7;// 8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9;// 10;// 11;// 12 13 14 INCLUDE omxtypes_s.h 15 INCLUDE armCOMM_s.h 16 17;// Define the processor variants supported by this file 18 19 M_VARIANTS ARM1136JS 20 21 22 IF ARM1136JS 23 24;//-------------------------------------- 25;// Input Arguments and their scope/usage 26;//-------------------------------------- 27ppSrc RN 0 ;// Persistent variable 28pDst RN 1 ;// Persistent variable 29 30;//-------------------------------- 31;// Variables and their scope/usage 32;//-------------------------------- 33pSrc RN 2 ;// Persistent variables 34Flag RN 3 35Value RN 4 36Value2 RN 5 37strOffset RN 6 38cstOffset RN 7 39 40 41 M_START armVCM4P10_UnpackBlock4x4, r7 42 43 LDR pSrc, [ppSrc] ;// Load pSrc 44 MOV cstOffset, #31 ;// To be used in the loop, to compute offset 45 46 ;//----------------------------------------------------------------------- 47 ; Firstly, fill all the coefficient values on the <pDst> buffer by zero 48 ;//----------------------------------------------------------------------- 49 50 MOV Value, #0 ;// Initialize the zero value 51 MOV Value2, #0 ;// Initialize the zero value 52 LDRB Flag, [pSrc], #1 ;// Preload <Flag> before <unpackLoop> 53 54 STRD Value, [pDst, #0] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0 55 STRD Value, [pDst, #8] ;// pDst[4] = pDst[5] = pDst[6] = pDst[7] = 0 56 STRD Value, [pDst, #16] ;// pDst[8] = pDst[9] = pDst[10] = pDst[11] = 0 57 STRD Value, [pDst, #24] ;// pDst[12] = pDst[13] = pDst[14] = pDst[15] = 0 58 59 ;//---------------------------------------------------------------------------- 60 ;// The loop below parses and unpacks the input stream. The C-model has 61 ;// a somewhat complicated logic for sign extension. But in the v6 version, 62 ;// that can be easily taken care by loading the data from <pSrc> stream as 63 ;// SIGNED byte/halfword. So, based on the first TST instruction, 8-bits or 64 ;// 16-bits are read. 65 ;// 66 ;// Next, to compute the offset, where the unpacked value needs to be stored, 67 ;// we modify the computation to perform [(Flag & 15) < 1] as [(Flag < 1) & 31] 68 ;// This results in a saving of one cycle. 69 ;//---------------------------------------------------------------------------- 70 71unpackLoop 72 TST Flag, #0x10 ;// Computing (Flag & 0x10) 73 LDRSBNE Value2,[pSrc,#1] ;// Load byte wise to avoid unaligned access 74 LDRBNE Value, [pSrc], #2 75 AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1; 76 LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++ 77 ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++ 78 79 TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done 80 LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration 81 STRH Value, [pDst, strOffset] ;// Store <Value> at offset <strOffset> 82 BEQ unpackLoop ;// Branch to the loop beginning 83 84 STR pSrc, [ppSrc] ;// Update the bitstream pointer 85 M_END 86 87 ENDIF 88 89 90 91 END 92