ih264_mem_fns_ssse3.c revision 8d3d303c7942ced6a987a52db8977d768dc3605f
1/****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19*/ 20/** 21 ******************************************************************************* 22 * @file 23 * ih264_mem_fns_atom_intr.c 24 * 25 * @brief 26 * Functions used for memory operations 27 * 28 * @author 29 * Ittiam 30 * 31 * @par List of Functions: 32 * 33 * @remarks 34 * None 35 * 36 ******************************************************************************* 37 */ 38 39/*****************************************************************************/ 40/* File Includes */ 41/*****************************************************************************/ 42#include <stdio.h> 43#include <stddef.h> 44#include <stdlib.h> 45#include <string.h> 46#include <assert.h> 47 48#include "ih264_typedefs.h" 49#include "ih264_mem_fns.h" 50 51#include <immintrin.h> 52 53/** 54 ******************************************************************************* 55 * 56 * @brief 57 * memcpy of a 8,16 or 32 bytes 58 * 59 * @par Description: 60 * Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes 61 * 62 * @param[in] pu1_dst 63 * UWORD8 pointer to the destination 64 * 65 * @param[in] pu1_src 66 * UWORD8 pointer to the source 67 * 68 * @param[in] num_bytes 69 * number of bytes to copy 70 * @returns 71 * 72 * @remarks 73 * None 74 * 75 ******************************************************************************* 76 */ 77 78 79 80 81void ih264_memcpy_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes) 82{ 83 int col; 84 for(col = num_bytes; col >= 8; col -= 8) 85 { 86 __m128i src_temp16x8b; 87 src_temp16x8b = _mm_loadl_epi64((__m128i *)(pu1_src)); 88 pu1_src += 8; 89 _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b); 90 pu1_dst += 8; 91 } 92} 93 94/** 95 ******************************************************************************* 96 * 97 * @brief 98 * memset of a 8,16 or 32 bytes 99 * 100 * @par Description: 101 * Does memset of 8bit data for 8,16 or 32 number of bytes 102 * 103 * @param[in] pu1_dst 104 * UWORD8 pointer to the destination 105 * 106 * @param[in] value 107 * UWORD8 value used for memset 108 * 109 * @param[in] num_bytes 110 * number of bytes to set 111 * @returns 112 * 113 * @remarks 114 * None 115 * 116 ******************************************************************************* 117 */ 118 119 120void ih264_memset_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes) 121{ 122 int col; 123 __m128i src_temp16x8b; 124 src_temp16x8b = _mm_set1_epi8(value); 125 for(col = num_bytes; col >= 8; col -= 8) 126 { 127 _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b); 128 pu1_dst += 8; 129 } 130} 131 132/** 133 ******************************************************************************* 134 * 135 * @brief 136 * memset of 16bit data of a 8,16 or 32 bytes 137 * 138 * @par Description: 139 * Does memset of 16bit data for 8,16 or 32 number of bytes 140 * 141 * @param[in] pu2_dst 142 * UWORD8 pointer to the destination 143 * 144 * @param[in] value 145 * UWORD16 value used for memset 146 * 147 * @param[in] num_words 148 * number of words to set 149 * @returns 150 * 151 * @remarks 152 * None 153 * 154 ******************************************************************************* 155 */ 156 157 158void ih264_memset_16bit_mul_8_ssse3(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words) 159{ 160 int col; 161 __m128i src_temp16x8b; 162 src_temp16x8b = _mm_set1_epi16(value); 163 for(col = num_words; col >= 8; col -= 8) 164 { 165 _mm_storeu_si128((__m128i *)(pu2_dst), src_temp16x8b); 166 pu2_dst += 8; 167 } 168} 169 170