omxVCCOMM_Copy16x16_s.s revision 0c1bc742181ded4930842b46e9507372f0b1b963
1 ;/**
2 ; * Function: omxVCCOMM_Copy16x16
3 ; *
4 ; * Description:
5 ; * Copies the reference 16x16 block to the current block.
6 ; * Parameters:
7 ; * [in] pSrc         - pointer to the reference block in the source frame; must be aligned on an 16-byte boundary.
8 ; * [in] step         - distance between the starts of consecutive lines in the reference frame, in bytes;
9 ; *                     must be a multiple of 16 and must be larger than or equal to 16.
10 ; * [out] pDst        - pointer to the destination block; must be aligned on an 8-byte boundary.
11 ; * Return Value:
12 ; * OMX_Sts_NoErr     - no error
13 ; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions:
14 ; *                   - one or more of the following pointers is NULL:  pSrc, pDst
15 ; *                   - one or more of the following pointers is not aligned on an 16-byte boundary:  pSrc, pDst
16 ; *                   - step <16 or step is not a multiple of 16.
17 ; */
18
19   INCLUDE omxtypes_s.h
20
21
22     M_VARIANTS CortexA8
23
24     IF CortexA8
25
26
27 ;//Input Arguments
28pSrc    RN 0
29pDst    RN 1
30step    RN 2
31
32;//Local Variables
33Return  RN 0
34;// Neon Registers
35
36X0      DN D0.S8
37X1      DN D1.S8
38X2      DN D2.S8
39X3      DN D3.S8
40X4      DN D4.S8
41X5      DN D5.S8
42X6      DN D6.S8
43X7      DN D7.S8
44
45     M_START omxVCCOMM_Copy16x16
46
47
48        VLD1  {X0,X1},[pSrc@128],step       ;// Load 16 bytes from 16 byte aligned pSrc and pSrc=pSrc + step after loading
49        VLD1  {X2,X3},[pSrc@128],step
50        VLD1  {X4,X5},[pSrc@128],step
51        VLD1  {X6,X7},[pSrc@128],step
52
53        VST1  {X0,X1,X2,X3},[pDst@128]!     ;// Store 32 bytes to 16 byte aligned pDst
54        VST1  {X4,X5,X6,X7},[pDst@128]!
55
56
57        VLD1  {X0,X1},[pSrc@128],step
58        VLD1  {X2,X3},[pSrc@128],step
59        VLD1  {X4,X5},[pSrc@128],step
60        VLD1  {X6,X7},[pSrc@128],step
61
62        VST1  {X0,X1,X2,X3},[pDst@128]!
63        VST1  {X4,X5,X6,X7},[pDst@128]!
64
65
66        VLD1  {X0,X1},[pSrc@128],step
67        VLD1  {X2,X3},[pSrc@128],step
68        VLD1  {X4,X5},[pSrc@128],step
69        VLD1  {X6,X7},[pSrc@128],step
70
71        VST1  {X0,X1,X2,X3},[pDst@128]!
72        VST1  {X4,X5,X6,X7},[pDst@128]!
73
74
75        VLD1  {X0,X1},[pSrc@128],step
76        VLD1  {X2,X3},[pSrc@128],step
77        VLD1  {X4,X5},[pSrc@128],step
78        VLD1  {X6,X7},[pSrc@128],step
79
80        VST1  {X0,X1,X2,X3},[pDst@128]!
81        VST1  {X4,X5,X6,X7},[pDst@128]!
82
83
84        MOV   Return,#OMX_Sts_NoErr
85
86
87
88        M_END
89        ENDIF
90
91
92
93
94        END
95