armVCM4P10_UnpackBlock4x4_s.s revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1;// 2;// Copyright (C) 2007-2008 ARM Limited 3;// 4;// Licensed under the Apache License, Version 2.0 (the "License"); 5;// you may not use this file except in compliance with the License. 6;// You may obtain a copy of the License at 7;// 8;// http://www.apache.org/licenses/LICENSE-2.0 9;// 10;// Unless required by applicable law or agreed to in writing, software 11;// distributed under the License is distributed on an "AS IS" BASIS, 12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13;// See the License for the specific language governing permissions and 14;// limitations under the License. 15;// 16;// 17;// 18;// File Name: armVCM4P10_UnpackBlock4x4_s.s 19;// OpenMAX DL: v1.0.2 20;// Revision: 12290 21;// Date: Wednesday, April 9, 2008 22;// 23;// 24;// 25;// 26 27 28 INCLUDE omxtypes_s.h 29 INCLUDE armCOMM_s.h 30 31;// Define the processor variants supported by this file 32 33 M_VARIANTS ARM1136JS 34 35 36 IF ARM1136JS 37 38;//-------------------------------------- 39;// Input Arguments and their scope/usage 40;//-------------------------------------- 41ppSrc RN 0 ;// Persistent variable 42pDst RN 1 ;// Persistent variable 43 44;//-------------------------------- 45;// Variables and their scope/usage 46;//-------------------------------- 47pSrc RN 2 ;// Persistent variables 48Flag RN 3 49Value RN 4 50Value2 RN 5 51strOffset RN 6 52cstOffset RN 7 53 54 55 M_START armVCM4P10_UnpackBlock4x4, r7 56 57 LDR pSrc, [ppSrc] ;// Load pSrc 58 MOV cstOffset, #31 ;// To be used in the loop, to compute offset 59 60 ;//----------------------------------------------------------------------- 61 ; Firstly, fill all the coefficient values on the <pDst> buffer by zero 62 ;//----------------------------------------------------------------------- 63 64 MOV Value, #0 ;// Initialize the zero value 65 MOV Value2, #0 ;// Initialize the zero value 66 LDRB Flag, [pSrc], #1 ;// Preload <Flag> before <unpackLoop> 67 68 STRD Value, [pDst, #0] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0 69 STRD Value, [pDst, #8] ;// pDst[4] = pDst[5] = pDst[6] = pDst[7] = 0 70 STRD Value, [pDst, #16] ;// pDst[8] = pDst[9] = pDst[10] = pDst[11] = 0 71 STRD Value, [pDst, #24] ;// pDst[12] = pDst[13] = pDst[14] = pDst[15] = 0 72 73 ;//---------------------------------------------------------------------------- 74 ;// The loop below parses and unpacks the input stream. The C-model has 75 ;// a somewhat complicated logic for sign extension. But in the v6 version, 76 ;// that can be easily taken care by loading the data from <pSrc> stream as 77 ;// SIGNED byte/halfword. So, based on the first TST instruction, 8-bits or 78 ;// 16-bits are read. 79 ;// 80 ;// Next, to compute the offset, where the unpacked value needs to be stored, 81 ;// we modify the computation to perform [(Flag & 15) < 1] as [(Flag < 1) & 31] 82 ;// This results in a saving of one cycle. 83 ;//---------------------------------------------------------------------------- 84 85unpackLoop 86 TST Flag, #0x10 ;// Computing (Flag & 0x10) 87 LDRSBNE Value2,[pSrc,#1] ;// Load byte wise to avoid unaligned access 88 LDRBNE Value, [pSrc], #2 89 AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1; 90 LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++ 91 ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++ 92 93 TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done 94 LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration 95 STRH Value, [pDst, strOffset] ;// Store <Value> at offset <strOffset> 96 BEQ unpackLoop ;// Branch to the loop beginning 97 98 STR pSrc, [ppSrc] ;// Update the bitstream pointer 99 M_END 100 101 ENDIF 102 103 104 105 END 106 107