ih264_mem_fns_ssse3.c revision 8d3d303c7942ced6a987a52db8977d768dc3605f
1/******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*/
20/**
21 *******************************************************************************
22 * @file
23 *  ih264_mem_fns_atom_intr.c
24 *
25 * @brief
26 *  Functions used for memory operations
27 *
28 * @author
29 *  Ittiam
30 *
31 * @par List of Functions:
32 *
33 * @remarks
34 *  None
35 *
36 *******************************************************************************
37 */
38
39/*****************************************************************************/
40/* File Includes                                                             */
41/*****************************************************************************/
42#include <stdio.h>
43#include <stddef.h>
44#include <stdlib.h>
45#include <string.h>
46#include <assert.h>
47
48#include "ih264_typedefs.h"
49#include "ih264_mem_fns.h"
50
51#include <immintrin.h>
52
53/**
54 *******************************************************************************
55 *
56 * @brief
57 *   memcpy of a 8,16 or 32 bytes
58 *
59 * @par Description:
60 *   Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
61 *
62 * @param[in] pu1_dst
63 *  UWORD8 pointer to the destination
64 *
65 * @param[in] pu1_src
66 *  UWORD8 pointer to the source
67 *
68 * @param[in] num_bytes
69 *  number of bytes to copy
70 * @returns
71 *
72 * @remarks
73 *  None
74 *
75 *******************************************************************************
76 */
77
78
79
80
81void ih264_memcpy_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes)
82{
83    int col;
84    for(col = num_bytes; col >= 8; col -= 8)
85    {
86        __m128i src_temp16x8b;
87        src_temp16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
88        pu1_src += 8;
89        _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b);
90        pu1_dst += 8;
91    }
92}
93
94/**
95 *******************************************************************************
96 *
97 * @brief
98 *   memset of a 8,16 or 32 bytes
99 *
100 * @par Description:
101 *   Does memset of 8bit data for 8,16 or 32 number of bytes
102 *
103 * @param[in] pu1_dst
104 *  UWORD8 pointer to the destination
105 *
106 * @param[in] value
107 *  UWORD8 value used for memset
108 *
109 * @param[in] num_bytes
110 *  number of bytes to set
111 * @returns
112 *
113 * @remarks
114 *  None
115 *
116 *******************************************************************************
117 */
118
119
120void ih264_memset_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes)
121{
122    int col;
123    __m128i src_temp16x8b;
124    src_temp16x8b = _mm_set1_epi8(value);
125    for(col = num_bytes; col >= 8; col -= 8)
126    {
127        _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b);
128        pu1_dst += 8;
129    }
130}
131
132/**
133 *******************************************************************************
134 *
135 * @brief
136 *   memset of 16bit data of a 8,16 or 32 bytes
137 *
138 * @par Description:
139 *   Does memset of 16bit data for 8,16 or 32 number of bytes
140 *
141 * @param[in] pu2_dst
142 *  UWORD8 pointer to the destination
143 *
144 * @param[in] value
145 *  UWORD16 value used for memset
146 *
147 * @param[in] num_words
148 *  number of words to set
149 * @returns
150 *
151 * @remarks
152 *  None
153 *
154 *******************************************************************************
155 */
156
157
158void ih264_memset_16bit_mul_8_ssse3(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words)
159{
160    int col;
161    __m128i src_temp16x8b;
162    src_temp16x8b = _mm_set1_epi16(value);
163    for(col = num_words; col >= 8; col -= 8)
164    {
165        _mm_storeu_si128((__m128i *)(pu2_dst), src_temp16x8b);
166        pu2_dst += 8;
167    }
168}
169
170