1/**************************************************************************** 2* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3* 4* Permission is hereby granted, free of charge, to any person obtaining a 5* copy of this software and associated documentation files (the "Software"), 6* to deal in the Software without restriction, including without limitation 7* the rights to use, copy, modify, merge, publish, distribute, sublicense, 8* and/or sell copies of the Software, and to permit persons to whom the 9* Software is furnished to do so, subject to the following conditions: 10* 11* The above copyright notice and this permission notice (including the next 12* paragraph) shall be included in all copies or substantial portions of the 13* Software. 14* 15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21* IN THE SOFTWARE. 22* 23* @file format_conversion.h 24* 25* @brief API implementation 26* 27******************************************************************************/ 28#include "format_types.h" 29#include "format_traits.h" 30 31////////////////////////////////////////////////////////////////////////// 32/// @brief Load SIMD packed pixels in SOA format and converts to 33/// SOA RGBA32_FLOAT format. 34/// @param pSrc - source data in SOA form 35/// @param dst - output data in SOA form 36template<SWR_FORMAT SrcFormat> 37INLINE void LoadSOA(const uint8_t *pSrc, simdvector &dst) 38{ 39 // fast path for float32 40 if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<SrcFormat>::GetBPC(0) == 32)) 41 { 42 auto lambda = [&](int comp) 43 { 44 simdscalar vComp = _simd_load_ps((const float*)(pSrc + comp*sizeof(simdscalar))); 45 46 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp; 47 }; 48 49 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda); 50 return; 51 } 52 53 auto lambda = [&](int comp) 54 { 55 // load SIMD components 56 simdscalar vComp = FormatTraits<SrcFormat>::loadSOA(comp, pSrc); 57 58 // unpack 59 vComp = FormatTraits<SrcFormat>::unpack(comp, vComp); 60 61 // convert 62 if (FormatTraits<SrcFormat>::isNormalized(comp)) 63 { 64 vComp = _simd_cvtepi32_ps(_simd_castps_si(vComp)); 65 vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<SrcFormat>::toFloat(comp))); 66 } 67 68 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp; 69 70 pSrc += (FormatTraits<SrcFormat>::GetBPC(comp) * KNOB_SIMD_WIDTH) / 8; 71 }; 72 73 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda); 74} 75 76////////////////////////////////////////////////////////////////////////// 77/// @brief Clamps the given component based on the requirements on the 78/// Format template arg 79/// @param vComp - SIMD vector of floats 80/// @param Component - component 81template<SWR_FORMAT Format> 82INLINE simdscalar Clamp(simdscalar vComp, uint32_t Component) 83{ 84 if (FormatTraits<Format>::isNormalized(Component)) 85 { 86 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UNORM) 87 { 88 vComp = _simd_max_ps(vComp, _simd_setzero_ps()); 89 } 90 91 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SNORM) 92 { 93 vComp = _simd_max_ps(vComp, _simd_set1_ps(-1.0f)); 94 } 95 vComp = _simd_min_ps(vComp, _simd_set1_ps(1.0f)); 96 } 97 else if (FormatTraits<Format>::GetBPC(Component) < 32) 98 { 99 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UINT) 100 { 101 int iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1; 102 int iMin = 0; 103 simdscalari vCompi = _simd_castps_si(vComp); 104 vCompi = _simd_max_epu32(vCompi, _simd_set1_epi32(iMin)); 105 vCompi = _simd_min_epu32(vCompi, _simd_set1_epi32(iMax)); 106 vComp = _simd_castsi_ps(vCompi); 107 } 108 else if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SINT) 109 { 110 int iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1; 111 int iMin = -1 - iMax; 112 simdscalari vCompi = _simd_castps_si(vComp); 113 vCompi = _simd_max_epi32(vCompi, _simd_set1_epi32(iMin)); 114 vCompi = _simd_min_epi32(vCompi, _simd_set1_epi32(iMax)); 115 vComp = _simd_castsi_ps(vCompi); 116 } 117 } 118 119 return vComp; 120} 121 122////////////////////////////////////////////////////////////////////////// 123/// @brief Normalize the given component based on the requirements on the 124/// Format template arg 125/// @param vComp - SIMD vector of floats 126/// @param Component - component 127template<SWR_FORMAT Format> 128INLINE simdscalar Normalize(simdscalar vComp, uint32_t Component) 129{ 130 if (FormatTraits<Format>::isNormalized(Component)) 131 { 132 vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<Format>::fromFloat(Component))); 133 vComp = _simd_castsi_ps(_simd_cvtps_epi32(vComp)); 134 } 135 return vComp; 136} 137 138////////////////////////////////////////////////////////////////////////// 139/// @brief Convert and store simdvector of pixels in SOA 140/// RGBA32_FLOAT to SOA format 141/// @param src - source data in SOA form 142/// @param dst - output data in SOA form 143template<SWR_FORMAT DstFormat> 144INLINE void StoreSOA(const simdvector &src, uint8_t *pDst) 145{ 146 // fast path for float32 147 if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<DstFormat>::GetBPC(0) == 32)) 148 { 149 for (uint32_t comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp) 150 { 151 simdscalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)]; 152 153 // Gamma-correct 154 if (FormatTraits<DstFormat>::isSRGB) 155 { 156 if (comp < 3) // Input format is always RGBA32_FLOAT. 157 { 158 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp); 159 } 160 } 161 162 _simd_store_ps((float*)(pDst + comp*sizeof(simdscalar)), vComp); 163 } 164 return; 165 } 166 167 auto lambda = [&](int comp) 168 { 169 simdscalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)]; 170 171 // Gamma-correct 172 if (FormatTraits<DstFormat>::isSRGB) 173 { 174 if (comp < 3) // Input format is always RGBA32_FLOAT. 175 { 176 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp); 177 } 178 } 179 180 // clamp 181 vComp = Clamp<DstFormat>(vComp, comp); 182 183 // normalize 184 vComp = Normalize<DstFormat>(vComp, comp); 185 186 // pack 187 vComp = FormatTraits<DstFormat>::pack(comp, vComp); 188 189 // store 190 FormatTraits<DstFormat>::storeSOA(comp, pDst, vComp); 191 192 pDst += (FormatTraits<DstFormat>::GetBPC(comp) * KNOB_SIMD_WIDTH) / 8; 193 }; 194 195 UnrollerL<0, FormatTraits<DstFormat>::numComps, 1>::step(lambda); 196} 197 198#if ENABLE_AVX512_SIMD16 199////////////////////////////////////////////////////////////////////////// 200/// @brief Load SIMD packed pixels in SOA format and converts to 201/// SOA RGBA32_FLOAT format. 202/// @param pSrc - source data in SOA form 203/// @param dst - output data in SOA form 204template<SWR_FORMAT SrcFormat> 205INLINE void LoadSOA(const uint8_t *pSrc, simd16vector &dst) 206{ 207 // fast path for float32 208 if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<SrcFormat>::GetBPC(0) == 32)) 209 { 210 auto lambda = [&](int comp) 211 { 212 simd16scalar vComp = _simd16_load_ps(reinterpret_cast<const float *>(pSrc + comp * sizeof(simd16scalar))); 213 214 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp; 215 }; 216 217 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda); 218 return; 219 } 220 221 auto lambda = [&](int comp) 222 { 223 // load SIMD components 224 simd16scalar vComp = FormatTraits<SrcFormat>::loadSOA_16(comp, pSrc); 225 226 // unpack 227 vComp = FormatTraits<SrcFormat>::unpack(comp, vComp); 228 229 // convert 230 if (FormatTraits<SrcFormat>::isNormalized(comp)) 231 { 232 vComp = _simd16_cvtepi32_ps(_simd16_castps_si(vComp)); 233 vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<SrcFormat>::toFloat(comp))); 234 } 235 236 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp; 237 238 pSrc += (FormatTraits<SrcFormat>::GetBPC(comp) * KNOB_SIMD16_WIDTH) / 8; 239 }; 240 241 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda); 242} 243 244////////////////////////////////////////////////////////////////////////// 245/// @brief Clamps the given component based on the requirements on the 246/// Format template arg 247/// @param vComp - SIMD vector of floats 248/// @param Component - component 249template<SWR_FORMAT Format> 250INLINE simd16scalar Clamp(simd16scalar vComp, uint32_t Component) 251{ 252 if (FormatTraits<Format>::isNormalized(Component)) 253 { 254 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UNORM) 255 { 256 vComp = _simd16_max_ps(vComp, _simd16_setzero_ps()); 257 } 258 259 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SNORM) 260 { 261 vComp = _simd16_max_ps(vComp, _simd16_set1_ps(-1.0f)); 262 } 263 vComp = _simd16_min_ps(vComp, _simd16_set1_ps(1.0f)); 264 } 265 else if (FormatTraits<Format>::GetBPC(Component) < 32) 266 { 267 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UINT) 268 { 269 int iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1; 270 int iMin = 0; 271 simd16scalari vCompi = _simd16_castps_si(vComp); 272 vCompi = _simd16_max_epu32(vCompi, _simd16_set1_epi32(iMin)); 273 vCompi = _simd16_min_epu32(vCompi, _simd16_set1_epi32(iMax)); 274 vComp = _simd16_castsi_ps(vCompi); 275 } 276 else if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SINT) 277 { 278 int iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1; 279 int iMin = -1 - iMax; 280 simd16scalari vCompi = _simd16_castps_si(vComp); 281 vCompi = _simd16_max_epi32(vCompi, _simd16_set1_epi32(iMin)); 282 vCompi = _simd16_min_epi32(vCompi, _simd16_set1_epi32(iMax)); 283 vComp = _simd16_castsi_ps(vCompi); 284 } 285 } 286 287 return vComp; 288} 289 290////////////////////////////////////////////////////////////////////////// 291/// @brief Normalize the given component based on the requirements on the 292/// Format template arg 293/// @param vComp - SIMD vector of floats 294/// @param Component - component 295template<SWR_FORMAT Format> 296INLINE simd16scalar Normalize(simd16scalar vComp, uint32_t Component) 297{ 298 if (FormatTraits<Format>::isNormalized(Component)) 299 { 300 vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<Format>::fromFloat(Component))); 301 vComp = _simd16_castsi_ps(_simd16_cvtps_epi32(vComp)); 302 } 303 return vComp; 304} 305 306////////////////////////////////////////////////////////////////////////// 307/// @brief Convert and store simdvector of pixels in SOA 308/// RGBA32_FLOAT to SOA format 309/// @param src - source data in SOA form 310/// @param dst - output data in SOA form 311template<SWR_FORMAT DstFormat> 312INLINE void StoreSOA(const simd16vector &src, uint8_t *pDst) 313{ 314 // fast path for float32 315 if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<DstFormat>::GetBPC(0) == 32)) 316 { 317 for (uint32_t comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp) 318 { 319 simd16scalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)]; 320 321 // Gamma-correct 322 if (FormatTraits<DstFormat>::isSRGB) 323 { 324 if (comp < 3) // Input format is always RGBA32_FLOAT. 325 { 326 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp); 327 } 328 } 329 330 _simd16_store_ps(reinterpret_cast<float *>(pDst + comp * sizeof(simd16scalar)), vComp); 331 } 332 return; 333 } 334 335 auto lambda = [&](int comp) 336 { 337 simd16scalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)]; 338 339 // Gamma-correct 340 if (FormatTraits<DstFormat>::isSRGB) 341 { 342 if (comp < 3) // Input format is always RGBA32_FLOAT. 343 { 344 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp); 345 } 346 } 347 348 // clamp 349 vComp = Clamp<DstFormat>(vComp, comp); 350 351 // normalize 352 vComp = Normalize<DstFormat>(vComp, comp); 353 354 // pack 355 vComp = FormatTraits<DstFormat>::pack(comp, vComp); 356 357 // store 358 FormatTraits<DstFormat>::storeSOA(comp, pDst, vComp); 359 360 pDst += (FormatTraits<DstFormat>::GetBPC(comp) * KNOB_SIMD16_WIDTH) / 8; 361 }; 362 363 UnrollerL<0, FormatTraits<DstFormat>::numComps, 1>::step(lambda); 364} 365 366#endif 367