SkColor_opts_SSE2.h revision 25f7455f3a7cf2c440509bead85486079f1e4b31
1/* 2 * Copyright 2014 The Android Open Source Project 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#ifndef SkColor_opts_SSE2_DEFINED 9#define SkColor_opts_SSE2_DEFINED 10 11#include <emmintrin.h> 12 13// See #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) in SkXfermode.cpp. 14static inline __m128i SkAlphaMulAlpha_SSE2(__m128i a, __m128i b) { 15 __m128i prod = _mm_mullo_epi16(a, b); 16 prod = _mm_add_epi32(prod, _mm_set1_epi32(128)); 17 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); 18 prod = _mm_srli_epi32(prod, 8); 19 20 return prod; 21} 22 23static inline __m128i SkGetPackedA32_SSE2(__m128i src) { 24 __m128i a = _mm_slli_epi32(src, (24 - SK_A32_SHIFT)); 25 return _mm_srli_epi32(a, 24); 26} 27 28static inline __m128i SkGetPackedR32_SSE2(__m128i src) { 29 __m128i r = _mm_slli_epi32(src, (24 - SK_R32_SHIFT)); 30 return _mm_srli_epi32(r, 24); 31} 32 33static inline __m128i SkGetPackedG32_SSE2(__m128i src) { 34 __m128i g = _mm_slli_epi32(src, (24 - SK_G32_SHIFT)); 35 return _mm_srli_epi32(g, 24); 36} 37 38static inline __m128i SkGetPackedB32_SSE2(__m128i src) { 39 __m128i b = _mm_slli_epi32(src, (24 - SK_B32_SHIFT)); 40 return _mm_srli_epi32(b, 24); 41} 42 43static inline __m128i SkMul16ShiftRound_SSE2(__m128i a, 44 __m128i b, int shift) { 45 __m128i prod = _mm_mullo_epi16(a, b); 46 prod = _mm_add_epi16(prod, _mm_set1_epi16(1 << (shift - 1))); 47 prod = _mm_add_epi16(prod, _mm_srli_epi16(prod, shift)); 48 prod = _mm_srli_epi16(prod, shift); 49 50 return prod; 51} 52 53static inline __m128i SkPackRGB16_SSE2(__m128i r, __m128i g, __m128i b) { 54 r = _mm_slli_epi16(r, SK_R16_SHIFT); 55 g = _mm_slli_epi16(g, SK_G16_SHIFT); 56 b = _mm_slli_epi16(b, SK_B16_SHIFT); 57 58 __m128i c = _mm_or_si128(r, g); 59 return _mm_or_si128(c, b); 60} 61 62static inline __m128i SkPackARGB32_SSE2(__m128i a, __m128i r, 63 __m128i g, __m128i b) { 64 a = _mm_slli_epi32(a, SK_A32_SHIFT); 65 r = _mm_slli_epi32(r, SK_R32_SHIFT); 66 g = _mm_slli_epi32(g, SK_G32_SHIFT); 67 b = _mm_slli_epi32(b, SK_B32_SHIFT); 68 69 __m128i c = _mm_or_si128(a, r); 70 c = _mm_or_si128(c, g); 71 return _mm_or_si128(c, b); 72} 73 74static inline __m128i SkPacked16ToR32_SSE2(__m128i src) { 75 __m128i r = _mm_srli_epi32(src, SK_R16_SHIFT); 76 r = _mm_and_si128(r, _mm_set1_epi32(SK_R16_MASK)); 77 r = _mm_or_si128(_mm_slli_epi32(r, (8 - SK_R16_BITS)), 78 _mm_srli_epi32(r, (2 * SK_R16_BITS - 8))); 79 80 return r; 81} 82 83static inline __m128i SkPacked16ToG32_SSE2(__m128i src) { 84 __m128i g = _mm_srli_epi32(src, SK_G16_SHIFT); 85 g = _mm_and_si128(g, _mm_set1_epi32(SK_G16_MASK)); 86 g = _mm_or_si128(_mm_slli_epi32(g, (8 - SK_G16_BITS)), 87 _mm_srli_epi32(g, (2 * SK_G16_BITS - 8))); 88 89 return g; 90} 91 92static inline __m128i SkPacked16ToB32_SSE2(__m128i src) { 93 __m128i b = _mm_srli_epi32(src, SK_B16_SHIFT); 94 b = _mm_and_si128(b, _mm_set1_epi32(SK_B16_MASK)); 95 b = _mm_or_si128(_mm_slli_epi32(b, (8 - SK_B16_BITS)), 96 _mm_srli_epi32(b, (2 * SK_B16_BITS - 8))); 97 98 return b; 99} 100 101static inline __m128i SkPixel16ToPixel32_SSE2(__m128i src) { 102 __m128i r = SkPacked16ToR32_SSE2(src); 103 __m128i g = SkPacked16ToG32_SSE2(src); 104 __m128i b = SkPacked16ToB32_SSE2(src); 105 106 return SkPackARGB32_SSE2(_mm_set1_epi32(0xFF), r, g, b); 107} 108 109static inline __m128i SkPixel32ToPixel16_ToU16_SSE2(__m128i src_pixel1, 110 __m128i src_pixel2) { 111 // Calculate result r. 112 __m128i r1 = _mm_srli_epi32(src_pixel1, 113 SK_R32_SHIFT + (8 - SK_R16_BITS)); 114 r1 = _mm_and_si128(r1, _mm_set1_epi32(SK_R16_MASK)); 115 __m128i r2 = _mm_srli_epi32(src_pixel2, 116 SK_R32_SHIFT + (8 - SK_R16_BITS)); 117 r2 = _mm_and_si128(r2, _mm_set1_epi32(SK_R16_MASK)); 118 __m128i r = _mm_packs_epi32(r1, r2); 119 120 // Calculate result g. 121 __m128i g1 = _mm_srli_epi32(src_pixel1, 122 SK_G32_SHIFT + (8 - SK_G16_BITS)); 123 g1 = _mm_and_si128(g1, _mm_set1_epi32(SK_G16_MASK)); 124 __m128i g2 = _mm_srli_epi32(src_pixel2, 125 SK_G32_SHIFT + (8 - SK_G16_BITS)); 126 g2 = _mm_and_si128(g2, _mm_set1_epi32(SK_G16_MASK)); 127 __m128i g = _mm_packs_epi32(g1, g2); 128 129 // Calculate result b. 130 __m128i b1 = _mm_srli_epi32(src_pixel1, 131 SK_B32_SHIFT + (8 - SK_B16_BITS)); 132 b1 = _mm_and_si128(b1, _mm_set1_epi32(SK_B16_MASK)); 133 __m128i b2 = _mm_srli_epi32(src_pixel2, 134 SK_B32_SHIFT + (8 - SK_B16_BITS)); 135 b2 = _mm_and_si128(b2, _mm_set1_epi32(SK_B16_MASK)); 136 __m128i b = _mm_packs_epi32(b1, b2); 137 138 // Store 8 16-bit colors in dst. 139 __m128i d_pixel = SkPackRGB16_SSE2(r, g, b); 140 141 return d_pixel; 142} 143 144#endif // SkColor_opts_SSE2_DEFINED 145