13a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett/* 23a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett * Copyright 2016 Google Inc. 33a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett * 43a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett * Use of this source code is governed by a BSD-style license that can be 53a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett * found in the LICENSE file. 63a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett */ 73a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 83a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett#ifndef SkSwizzler_opts_DEFINED 93a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett#define SkSwizzler_opts_DEFINED 103a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 11a4083c97d48e8a4f88e2797d7363f141e3d42553Cary Clark#include "SkColorData.h" 123a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 13e18fa440e74e9af0324de0a1de9b6ffb0fe3c3d3mtklein#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 14e18fa440e74e9af0324de0a1de9b6ffb0fe3c3d3mtklein #include <immintrin.h> 15e18fa440e74e9af0324de0a1de9b6ffb0fe3c3d3mtklein#elif defined(SK_ARM_HAS_NEON) 16e18fa440e74e9af0324de0a1de9b6ffb0fe3c3d3mtklein #include <arm_neon.h> 17e18fa440e74e9af0324de0a1de9b6ffb0fe3c3d3mtklein#endif 18e18fa440e74e9af0324de0a1de9b6ffb0fe3c3d3mtklein 193a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarettnamespace SK_OPTS_NS { 203a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 218bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtkleinstatic void RGBA_to_rgbA_portable(uint32_t* dst, const void* vsrc, int count) { 228bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein auto src = (const uint32_t*)vsrc; 233a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett for (int i = 0; i < count; i++) { 243a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett uint8_t a = src[i] >> 24, 258bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein b = src[i] >> 16, 263a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett g = src[i] >> 8, 278bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein r = src[i] >> 0; 283a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett b = (b*a+127)/255; 298bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein g = (g*a+127)/255; 308bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein r = (r*a+127)/255; 313a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett dst[i] = (uint32_t)a << 24 328bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein | (uint32_t)b << 16 333a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett | (uint32_t)g << 8 348bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein | (uint32_t)r << 0; 353a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett } 363a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett} 373a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 388bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtkleinstatic void RGBA_to_bgrA_portable(uint32_t* dst, const void* vsrc, int count) { 398bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein auto src = (const uint32_t*)vsrc; 403a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett for (int i = 0; i < count; i++) { 413a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett uint8_t a = src[i] >> 24, 428bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein b = src[i] >> 16, 433a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett g = src[i] >> 8, 448bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein r = src[i] >> 0; 453a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett b = (b*a+127)/255; 468bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein g = (g*a+127)/255; 478bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein r = (r*a+127)/255; 483a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett dst[i] = (uint32_t)a << 24 498bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein | (uint32_t)r << 16 503a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett | (uint32_t)g << 8 518bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein | (uint32_t)b << 0; 523a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett } 533a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett} 543a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 558bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtkleinstatic void RGBA_to_BGRA_portable(uint32_t* dst, const void* vsrc, int count) { 568bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein auto src = (const uint32_t*)vsrc; 5703108de163354fa574679ad153b58ce57126b2bamsarett for (int i = 0; i < count; i++) { 5803108de163354fa574679ad153b58ce57126b2bamsarett uint8_t a = src[i] >> 24, 598bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein b = src[i] >> 16, 6003108de163354fa574679ad153b58ce57126b2bamsarett g = src[i] >> 8, 618bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein r = src[i] >> 0; 6203108de163354fa574679ad153b58ce57126b2bamsarett dst[i] = (uint32_t)a << 24 638bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein | (uint32_t)r << 16 6403108de163354fa574679ad153b58ce57126b2bamsarett | (uint32_t)g << 8 658bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein | (uint32_t)b << 0; 6603108de163354fa574679ad153b58ce57126b2bamsarett } 6703108de163354fa574679ad153b58ce57126b2bamsarett} 6803108de163354fa574679ad153b58ce57126b2bamsarett 69f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarettstatic void RGB_to_RGB1_portable(uint32_t dst[], const void* vsrc, int count) { 70f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett const uint8_t* src = (const uint8_t*)vsrc; 71f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett for (int i = 0; i < count; i++) { 72f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett uint8_t r = src[0], 73f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett g = src[1], 74f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett b = src[2]; 75f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett src += 3; 76f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett dst[i] = (uint32_t)0xFF << 24 77f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett | (uint32_t)b << 16 78f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett | (uint32_t)g << 8 79f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett | (uint32_t)r << 0; 80f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett } 81f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett} 82f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett 83f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarettstatic void RGB_to_BGR1_portable(uint32_t dst[], const void* vsrc, int count) { 84f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett const uint8_t* src = (const uint8_t*)vsrc; 85f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett for (int i = 0; i < count; i++) { 86f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett uint8_t r = src[0], 87f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett g = src[1], 88f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett b = src[2]; 89f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett src += 3; 90f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett dst[i] = (uint32_t)0xFF << 24 91f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett | (uint32_t)r << 16 92f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett | (uint32_t)g << 8 93f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett | (uint32_t)b << 0; 94f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett } 95f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett} 96f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett 972eff71c9b5f984b58961e5a6b4e66774c4385224msarettstatic void gray_to_RGB1_portable(uint32_t dst[], const void* vsrc, int count) { 982eff71c9b5f984b58961e5a6b4e66774c4385224msarett const uint8_t* src = (const uint8_t*)vsrc; 992eff71c9b5f984b58961e5a6b4e66774c4385224msarett for (int i = 0; i < count; i++) { 1002eff71c9b5f984b58961e5a6b4e66774c4385224msarett dst[i] = (uint32_t)0xFF << 24 1012eff71c9b5f984b58961e5a6b4e66774c4385224msarett | (uint32_t)src[i] << 16 1022eff71c9b5f984b58961e5a6b4e66774c4385224msarett | (uint32_t)src[i] << 8 1032eff71c9b5f984b58961e5a6b4e66774c4385224msarett | (uint32_t)src[i] << 0; 1042eff71c9b5f984b58961e5a6b4e66774c4385224msarett } 1052eff71c9b5f984b58961e5a6b4e66774c4385224msarett} 1062eff71c9b5f984b58961e5a6b4e66774c4385224msarett 1071e06079b259d1091b735492b2f71d9897c14c608msarettstatic void grayA_to_RGBA_portable(uint32_t dst[], const void* vsrc, int count) { 1081e06079b259d1091b735492b2f71d9897c14c608msarett const uint8_t* src = (const uint8_t*)vsrc; 1091e06079b259d1091b735492b2f71d9897c14c608msarett for (int i = 0; i < count; i++) { 1101e06079b259d1091b735492b2f71d9897c14c608msarett uint8_t g = src[0], 1111e06079b259d1091b735492b2f71d9897c14c608msarett a = src[1]; 1121e06079b259d1091b735492b2f71d9897c14c608msarett src += 2; 1131e06079b259d1091b735492b2f71d9897c14c608msarett dst[i] = (uint32_t)a << 24 1141e06079b259d1091b735492b2f71d9897c14c608msarett | (uint32_t)g << 16 1151e06079b259d1091b735492b2f71d9897c14c608msarett | (uint32_t)g << 8 1161e06079b259d1091b735492b2f71d9897c14c608msarett | (uint32_t)g << 0; 1171e06079b259d1091b735492b2f71d9897c14c608msarett } 1181e06079b259d1091b735492b2f71d9897c14c608msarett} 1191e06079b259d1091b735492b2f71d9897c14c608msarett 1201e06079b259d1091b735492b2f71d9897c14c608msarettstatic void grayA_to_rgbA_portable(uint32_t dst[], const void* vsrc, int count) { 1211e06079b259d1091b735492b2f71d9897c14c608msarett const uint8_t* src = (const uint8_t*)vsrc; 1221e06079b259d1091b735492b2f71d9897c14c608msarett for (int i = 0; i < count; i++) { 1231e06079b259d1091b735492b2f71d9897c14c608msarett uint8_t g = src[0], 1241e06079b259d1091b735492b2f71d9897c14c608msarett a = src[1]; 1251e06079b259d1091b735492b2f71d9897c14c608msarett src += 2; 1261e06079b259d1091b735492b2f71d9897c14c608msarett g = (g*a+127)/255; 1271e06079b259d1091b735492b2f71d9897c14c608msarett dst[i] = (uint32_t)a << 24 1281e06079b259d1091b735492b2f71d9897c14c608msarett | (uint32_t)g << 16 1291e06079b259d1091b735492b2f71d9897c14c608msarett | (uint32_t)g << 8 1301e06079b259d1091b735492b2f71d9897c14c608msarett | (uint32_t)g << 0; 1311e06079b259d1091b735492b2f71d9897c14c608msarett } 1321e06079b259d1091b735492b2f71d9897c14c608msarett} 1331e06079b259d1091b735492b2f71d9897c14c608msarett 134c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarettstatic void inverted_CMYK_to_RGB1_portable(uint32_t* dst, const void* vsrc, int count) { 135c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett const uint32_t* src = (const uint32_t*)vsrc; 136c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett for (int i = 0; i < count; i++) { 137c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett uint8_t k = src[i] >> 24, 138c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett y = src[i] >> 16, 139c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett m = src[i] >> 8, 140c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett c = src[i] >> 0; 141c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett // See comments in SkSwizzler.cpp for details on the conversion formula. 142c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett uint8_t b = (y*k+127)/255, 143c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett g = (m*k+127)/255, 144c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett r = (c*k+127)/255; 145c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett dst[i] = (uint32_t)0xFF << 24 146c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett | (uint32_t) b << 16 147c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett | (uint32_t) g << 8 148c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett | (uint32_t) r << 0; 149c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett } 150c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett} 151c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 152c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarettstatic void inverted_CMYK_to_BGR1_portable(uint32_t* dst, const void* vsrc, int count) { 153c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett const uint32_t* src = (const uint32_t*)vsrc; 154c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett for (int i = 0; i < count; i++) { 155c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett uint8_t k = src[i] >> 24, 156c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett y = src[i] >> 16, 157c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett m = src[i] >> 8, 158c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett c = src[i] >> 0; 159c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett uint8_t b = (y*k+127)/255, 160c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett g = (m*k+127)/255, 161c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett r = (c*k+127)/255; 162c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett dst[i] = (uint32_t)0xFF << 24 163c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett | (uint32_t) r << 16 164c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett | (uint32_t) g << 8 165c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett | (uint32_t) b << 0; 166c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett } 167c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett} 168c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 1693a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett#if defined(SK_ARM_HAS_NEON) 1703a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 1713a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett// Rounded divide by 255, (x + 127) / 255 1723a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarettstatic uint8x8_t div255_round(uint16x8_t x) { 1733a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // result = (x + 127) / 255 1743a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // result = (x + 127) / 256 + error1 1753a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // 1763a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // error1 = (x + 127) / (255 * 256) 1773a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // error1 = (x + 127) / (256 * 256) + error2 1783a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // 1793a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // error2 = (x + 127) / (255 * 256 * 256) 1803a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // 1813a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // The maximum value of error2 is too small to matter. Thus: 1823a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // result = (x + 127) / 256 + (x + 127) / (256 * 256) 1833a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // result = ((x + 127) / 256 + x + 127) / 256 1843a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // result = ((x + 127) >> 8 + x + 127) >> 8 1853a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // 1863a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // Use >>> to represent "rounded right shift" which, conveniently, 1873a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // NEON supports in one instruction. 1883a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // result = ((x >>> 8) + x) >>> 8 1893a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // 1903a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // Note that the second right shift is actually performed as an 1913a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // "add, round, and narrow back to 8-bits" instruction. 1923a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett return vraddhn_u16(x, vrshrq_n_u16(x, 8)); 1933a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett} 1943a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 1953a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett// Scale a byte by another, (x * y + 127) / 255 1963a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarettstatic uint8x8_t scale(uint8x8_t x, uint8x8_t y) { 1973a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett return div255_round(vmull_u8(x, y)); 1983a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett} 1993a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 2003a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsaretttemplate <bool kSwapRB> 2018bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtkleinstatic void premul_should_swapRB(uint32_t* dst, const void* vsrc, int count) { 2028bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein auto src = (const uint32_t*)vsrc; 2033a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett while (count >= 8) { 2043a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // Load 8 pixels. 205f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett uint8x8x4_t rgba = vld4_u8((const uint8_t*) src); 2063a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 207f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett uint8x8_t a = rgba.val[3], 208f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett b = rgba.val[2], 209f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett g = rgba.val[1], 210f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett r = rgba.val[0]; 2113a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 2123a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // Premultiply. 2133a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett b = scale(b, a); 2148bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein g = scale(g, a); 2158bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein r = scale(r, a); 2163a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 2173a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // Store 8 premultiplied pixels. 2183a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett if (kSwapRB) { 219f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[2] = r; 220f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[1] = g; 221f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[0] = b; 2228bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein } else { 223f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[2] = b; 224f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[1] = g; 225f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[0] = r; 2263a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett } 227f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett vst4_u8((uint8_t*) dst, rgba); 2283a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett src += 8; 2293a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett dst += 8; 2303a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett count -= 8; 2313a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett } 2323a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 2333a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett // Call portable code to finish up the tail of [0,8) pixels. 2348bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein auto proc = kSwapRB ? RGBA_to_bgrA_portable : RGBA_to_rgbA_portable; 2353a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett proc(dst, src, count); 2363a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett} 2373a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 238cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { 2398bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein premul_should_swapRB<false>(dst, src, count); 2403a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett} 2413a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 242cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { 2438bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein premul_should_swapRB<true>(dst, src, count); 2443a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett} 2453a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 246cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void RGBA_to_BGRA(uint32_t* dst, const void* vsrc, int count) { 2478bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein auto src = (const uint32_t*)vsrc; 24803108de163354fa574679ad153b58ce57126b2bamsarett while (count >= 16) { 24903108de163354fa574679ad153b58ce57126b2bamsarett // Load 16 pixels. 250f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett uint8x16x4_t rgba = vld4q_u8((const uint8_t*) src); 25103108de163354fa574679ad153b58ce57126b2bamsarett 25203108de163354fa574679ad153b58ce57126b2bamsarett // Swap r and b. 253f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett SkTSwap(rgba.val[0], rgba.val[2]); 25403108de163354fa574679ad153b58ce57126b2bamsarett 25503108de163354fa574679ad153b58ce57126b2bamsarett // Store 16 pixels. 256f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett vst4q_u8((uint8_t*) dst, rgba); 25703108de163354fa574679ad153b58ce57126b2bamsarett src += 16; 25803108de163354fa574679ad153b58ce57126b2bamsarett dst += 16; 25903108de163354fa574679ad153b58ce57126b2bamsarett count -= 16; 26003108de163354fa574679ad153b58ce57126b2bamsarett } 26103108de163354fa574679ad153b58ce57126b2bamsarett 26203108de163354fa574679ad153b58ce57126b2bamsarett if (count >= 8) { 26303108de163354fa574679ad153b58ce57126b2bamsarett // Load 8 pixels. 264f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett uint8x8x4_t rgba = vld4_u8((const uint8_t*) src); 26503108de163354fa574679ad153b58ce57126b2bamsarett 26603108de163354fa574679ad153b58ce57126b2bamsarett // Swap r and b. 267f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett SkTSwap(rgba.val[0], rgba.val[2]); 26803108de163354fa574679ad153b58ce57126b2bamsarett 26903108de163354fa574679ad153b58ce57126b2bamsarett // Store 8 pixels. 270f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett vst4_u8((uint8_t*) dst, rgba); 27103108de163354fa574679ad153b58ce57126b2bamsarett src += 8; 27203108de163354fa574679ad153b58ce57126b2bamsarett dst += 8; 27303108de163354fa574679ad153b58ce57126b2bamsarett count -= 8; 27403108de163354fa574679ad153b58ce57126b2bamsarett } 27503108de163354fa574679ad153b58ce57126b2bamsarett 2768bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein RGBA_to_BGRA_portable(dst, src, count); 27703108de163354fa574679ad153b58ce57126b2bamsarett} 27803108de163354fa574679ad153b58ce57126b2bamsarett 279f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsaretttemplate <bool kSwapRB> 280f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarettstatic void insert_alpha_should_swaprb(uint32_t dst[], const void* vsrc, int count) { 281f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett const uint8_t* src = (const uint8_t*) vsrc; 282f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett while (count >= 16) { 283f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett // Load 16 pixels. 284f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett uint8x16x3_t rgb = vld3q_u8(src); 285f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett 286f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett // Insert an opaque alpha channel and swap if needed. 287f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett uint8x16x4_t rgba; 288f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett if (kSwapRB) { 289f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[0] = rgb.val[2]; 290f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[2] = rgb.val[0]; 291f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett } else { 292f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[0] = rgb.val[0]; 293f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[2] = rgb.val[2]; 294f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett } 295f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[1] = rgb.val[1]; 296f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[3] = vdupq_n_u8(0xFF); 297f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett 298f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett // Store 16 pixels. 299f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett vst4q_u8((uint8_t*) dst, rgba); 300f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett src += 16*3; 301f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett dst += 16; 302f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett count -= 16; 303f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett } 304f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett 305f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett if (count >= 8) { 306f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett // Load 8 pixels. 307f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett uint8x8x3_t rgb = vld3_u8(src); 308f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett 309f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett // Insert an opaque alpha channel and swap if needed. 310f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett uint8x8x4_t rgba; 311f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett if (kSwapRB) { 312f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[0] = rgb.val[2]; 313f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[2] = rgb.val[0]; 314f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett } else { 315f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[0] = rgb.val[0]; 316f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[2] = rgb.val[2]; 317f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett } 318f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[1] = rgb.val[1]; 319f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett rgba.val[3] = vdup_n_u8(0xFF); 320f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett 321f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett // Store 8 pixels. 322f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett vst4_u8((uint8_t*) dst, rgba); 323f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett src += 8*3; 324f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett dst += 8; 325f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett count -= 8; 326f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett } 327f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett 328f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett // Call portable code to finish up the tail of [0,8) pixels. 329f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett auto proc = kSwapRB ? RGB_to_BGR1_portable : RGB_to_RGB1_portable; 330f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett proc(dst, src, count); 331f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett} 332f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett 333cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { 334f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett insert_alpha_should_swaprb<false>(dst, src, count); 335f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett} 336f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett 337cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { 338f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett insert_alpha_should_swaprb<true>(dst, src, count); 339f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett} 340f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett 341cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void gray_to_RGB1(uint32_t dst[], const void* vsrc, int count) { 3422eff71c9b5f984b58961e5a6b4e66774c4385224msarett const uint8_t* src = (const uint8_t*) vsrc; 3432eff71c9b5f984b58961e5a6b4e66774c4385224msarett while (count >= 16) { 3442eff71c9b5f984b58961e5a6b4e66774c4385224msarett // Load 16 pixels. 3452eff71c9b5f984b58961e5a6b4e66774c4385224msarett uint8x16_t gray = vld1q_u8(src); 3462eff71c9b5f984b58961e5a6b4e66774c4385224msarett 3472eff71c9b5f984b58961e5a6b4e66774c4385224msarett // Set each of the color channels. 3482eff71c9b5f984b58961e5a6b4e66774c4385224msarett uint8x16x4_t rgba; 3492eff71c9b5f984b58961e5a6b4e66774c4385224msarett rgba.val[0] = gray; 3502eff71c9b5f984b58961e5a6b4e66774c4385224msarett rgba.val[1] = gray; 3512eff71c9b5f984b58961e5a6b4e66774c4385224msarett rgba.val[2] = gray; 3522eff71c9b5f984b58961e5a6b4e66774c4385224msarett rgba.val[3] = vdupq_n_u8(0xFF); 3532eff71c9b5f984b58961e5a6b4e66774c4385224msarett 3542eff71c9b5f984b58961e5a6b4e66774c4385224msarett // Store 16 pixels. 3552eff71c9b5f984b58961e5a6b4e66774c4385224msarett vst4q_u8((uint8_t*) dst, rgba); 3562eff71c9b5f984b58961e5a6b4e66774c4385224msarett src += 16; 3572eff71c9b5f984b58961e5a6b4e66774c4385224msarett dst += 16; 3582eff71c9b5f984b58961e5a6b4e66774c4385224msarett count -= 16; 3592eff71c9b5f984b58961e5a6b4e66774c4385224msarett } 3602eff71c9b5f984b58961e5a6b4e66774c4385224msarett 3612eff71c9b5f984b58961e5a6b4e66774c4385224msarett if (count >= 8) { 3622eff71c9b5f984b58961e5a6b4e66774c4385224msarett // Load 8 pixels. 3632eff71c9b5f984b58961e5a6b4e66774c4385224msarett uint8x8_t gray = vld1_u8(src); 3642eff71c9b5f984b58961e5a6b4e66774c4385224msarett 3652eff71c9b5f984b58961e5a6b4e66774c4385224msarett // Set each of the color channels. 3662eff71c9b5f984b58961e5a6b4e66774c4385224msarett uint8x8x4_t rgba; 3672eff71c9b5f984b58961e5a6b4e66774c4385224msarett rgba.val[0] = gray; 3682eff71c9b5f984b58961e5a6b4e66774c4385224msarett rgba.val[1] = gray; 3692eff71c9b5f984b58961e5a6b4e66774c4385224msarett rgba.val[2] = gray; 3702eff71c9b5f984b58961e5a6b4e66774c4385224msarett rgba.val[3] = vdup_n_u8(0xFF); 3712eff71c9b5f984b58961e5a6b4e66774c4385224msarett 3722eff71c9b5f984b58961e5a6b4e66774c4385224msarett // Store 8 pixels. 3732eff71c9b5f984b58961e5a6b4e66774c4385224msarett vst4_u8((uint8_t*) dst, rgba); 3742eff71c9b5f984b58961e5a6b4e66774c4385224msarett src += 8; 3752eff71c9b5f984b58961e5a6b4e66774c4385224msarett dst += 8; 3762eff71c9b5f984b58961e5a6b4e66774c4385224msarett count -= 8; 3772eff71c9b5f984b58961e5a6b4e66774c4385224msarett } 3782eff71c9b5f984b58961e5a6b4e66774c4385224msarett 3792eff71c9b5f984b58961e5a6b4e66774c4385224msarett gray_to_RGB1_portable(dst, src, count); 3802eff71c9b5f984b58961e5a6b4e66774c4385224msarett} 3812eff71c9b5f984b58961e5a6b4e66774c4385224msarett 3821e06079b259d1091b735492b2f71d9897c14c608msaretttemplate <bool kPremul> 3831e06079b259d1091b735492b2f71d9897c14c608msarettstatic void expand_grayA(uint32_t dst[], const void* vsrc, int count) { 3841e06079b259d1091b735492b2f71d9897c14c608msarett const uint8_t* src = (const uint8_t*) vsrc; 3851e06079b259d1091b735492b2f71d9897c14c608msarett while (count >= 16) { 3861e06079b259d1091b735492b2f71d9897c14c608msarett // Load 16 pixels. 3871e06079b259d1091b735492b2f71d9897c14c608msarett uint8x16x2_t ga = vld2q_u8(src); 3881e06079b259d1091b735492b2f71d9897c14c608msarett 3891e06079b259d1091b735492b2f71d9897c14c608msarett // Premultiply if requested. 3901e06079b259d1091b735492b2f71d9897c14c608msarett if (kPremul) { 3911e06079b259d1091b735492b2f71d9897c14c608msarett ga.val[0] = vcombine_u8( 3921e06079b259d1091b735492b2f71d9897c14c608msarett scale(vget_low_u8(ga.val[0]), vget_low_u8(ga.val[1])), 3931e06079b259d1091b735492b2f71d9897c14c608msarett scale(vget_high_u8(ga.val[0]), vget_high_u8(ga.val[1]))); 3941e06079b259d1091b735492b2f71d9897c14c608msarett } 3951e06079b259d1091b735492b2f71d9897c14c608msarett 3961e06079b259d1091b735492b2f71d9897c14c608msarett // Set each of the color channels. 3971e06079b259d1091b735492b2f71d9897c14c608msarett uint8x16x4_t rgba; 3981e06079b259d1091b735492b2f71d9897c14c608msarett rgba.val[0] = ga.val[0]; 3991e06079b259d1091b735492b2f71d9897c14c608msarett rgba.val[1] = ga.val[0]; 4001e06079b259d1091b735492b2f71d9897c14c608msarett rgba.val[2] = ga.val[0]; 4011e06079b259d1091b735492b2f71d9897c14c608msarett rgba.val[3] = ga.val[1]; 4021e06079b259d1091b735492b2f71d9897c14c608msarett 4031e06079b259d1091b735492b2f71d9897c14c608msarett // Store 16 pixels. 4041e06079b259d1091b735492b2f71d9897c14c608msarett vst4q_u8((uint8_t*) dst, rgba); 4051e06079b259d1091b735492b2f71d9897c14c608msarett src += 16*2; 4061e06079b259d1091b735492b2f71d9897c14c608msarett dst += 16; 4071e06079b259d1091b735492b2f71d9897c14c608msarett count -= 16; 4081e06079b259d1091b735492b2f71d9897c14c608msarett } 4091e06079b259d1091b735492b2f71d9897c14c608msarett 4101e06079b259d1091b735492b2f71d9897c14c608msarett if (count >= 8) { 4111e06079b259d1091b735492b2f71d9897c14c608msarett // Load 8 pixels. 4121e06079b259d1091b735492b2f71d9897c14c608msarett uint8x8x2_t ga = vld2_u8(src); 4131e06079b259d1091b735492b2f71d9897c14c608msarett 4141e06079b259d1091b735492b2f71d9897c14c608msarett // Premultiply if requested. 4151e06079b259d1091b735492b2f71d9897c14c608msarett if (kPremul) { 4161e06079b259d1091b735492b2f71d9897c14c608msarett ga.val[0] = scale(ga.val[0], ga.val[1]); 4171e06079b259d1091b735492b2f71d9897c14c608msarett } 4181e06079b259d1091b735492b2f71d9897c14c608msarett 4191e06079b259d1091b735492b2f71d9897c14c608msarett // Set each of the color channels. 4201e06079b259d1091b735492b2f71d9897c14c608msarett uint8x8x4_t rgba; 4211e06079b259d1091b735492b2f71d9897c14c608msarett rgba.val[0] = ga.val[0]; 4221e06079b259d1091b735492b2f71d9897c14c608msarett rgba.val[1] = ga.val[0]; 4231e06079b259d1091b735492b2f71d9897c14c608msarett rgba.val[2] = ga.val[0]; 4241e06079b259d1091b735492b2f71d9897c14c608msarett rgba.val[3] = ga.val[1]; 4251e06079b259d1091b735492b2f71d9897c14c608msarett 4261e06079b259d1091b735492b2f71d9897c14c608msarett // Store 8 pixels. 4271e06079b259d1091b735492b2f71d9897c14c608msarett vst4_u8((uint8_t*) dst, rgba); 4281e06079b259d1091b735492b2f71d9897c14c608msarett src += 8*2; 4291e06079b259d1091b735492b2f71d9897c14c608msarett dst += 8; 4301e06079b259d1091b735492b2f71d9897c14c608msarett count -= 8; 4311e06079b259d1091b735492b2f71d9897c14c608msarett } 4321e06079b259d1091b735492b2f71d9897c14c608msarett 4331e06079b259d1091b735492b2f71d9897c14c608msarett auto proc = kPremul ? grayA_to_rgbA_portable : grayA_to_RGBA_portable; 4341e06079b259d1091b735492b2f71d9897c14c608msarett proc(dst, src, count); 4351e06079b259d1091b735492b2f71d9897c14c608msarett} 4361e06079b259d1091b735492b2f71d9897c14c608msarett 437cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { 4381e06079b259d1091b735492b2f71d9897c14c608msarett expand_grayA<false>(dst, src, count); 4391e06079b259d1091b735492b2f71d9897c14c608msarett} 4401e06079b259d1091b735492b2f71d9897c14c608msarett 441cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { 4421e06079b259d1091b735492b2f71d9897c14c608msarett expand_grayA<true>(dst, src, count); 4431e06079b259d1091b735492b2f71d9897c14c608msarett} 4441e06079b259d1091b735492b2f71d9897c14c608msarett 445c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarettenum Format { kRGB1, kBGR1 }; 446c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msaretttemplate <Format format> 447c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarettstatic void inverted_cmyk_to(uint32_t* dst, const void* vsrc, int count) { 448c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett auto src = (const uint32_t*)vsrc; 449c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett while (count >= 8) { 450c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett // Load 8 cmyk pixels. 451c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett uint8x8x4_t pixels = vld4_u8((const uint8_t*) src); 452c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 453c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett uint8x8_t k = pixels.val[3], 454c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett y = pixels.val[2], 455c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett m = pixels.val[1], 456c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett c = pixels.val[0]; 457c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 458c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett // Scale to r, g, b. 459c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett uint8x8_t b = scale(y, k); 460c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett uint8x8_t g = scale(m, k); 461c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett uint8x8_t r = scale(c, k); 462c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 463c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett // Store 8 rgba pixels. 464c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett if (kBGR1 == format) { 465c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett pixels.val[3] = vdup_n_u8(0xFF); 466c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett pixels.val[2] = r; 467c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett pixels.val[1] = g; 468c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett pixels.val[0] = b; 469c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett } else { 470c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett pixels.val[3] = vdup_n_u8(0xFF); 471c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett pixels.val[2] = b; 472c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett pixels.val[1] = g; 473c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett pixels.val[0] = r; 474c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett } 475c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett vst4_u8((uint8_t*) dst, pixels); 476c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett src += 8; 477c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett dst += 8; 478c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett count -= 8; 479c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett } 480c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 481c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett auto proc = (kBGR1 == format) ? inverted_CMYK_to_BGR1_portable : inverted_CMYK_to_RGB1_portable; 482c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett proc(dst, src, count); 483c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett} 484c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 485cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) { 486c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett inverted_cmyk_to<kRGB1>(dst, src, count); 487c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett} 488c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 489cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) { 490c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett inverted_cmyk_to<kBGR1>(dst, src, count); 491c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett} 492c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 49353b9d29b973f2828624f097bf110f1c7acc4b593msarett#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 49453b9d29b973f2828624f097bf110f1c7acc4b593msarett 495095742419d0277a4fb0d499a05ff29b7506f1c5emsarett// Scale a byte by another. 496095742419d0277a4fb0d499a05ff29b7506f1c5emsarett// Inputs are stored in 16-bit lanes, but are not larger than 8-bits. 497095742419d0277a4fb0d499a05ff29b7506f1c5emsarettstatic __m128i scale(__m128i x, __m128i y) { 498095742419d0277a4fb0d499a05ff29b7506f1c5emsarett const __m128i _128 = _mm_set1_epi16(128); 499095742419d0277a4fb0d499a05ff29b7506f1c5emsarett const __m128i _257 = _mm_set1_epi16(257); 500095742419d0277a4fb0d499a05ff29b7506f1c5emsarett 501095742419d0277a4fb0d499a05ff29b7506f1c5emsarett // (x+127)/255 == ((x+128)*257)>>16 for 0 <= x <= 255*255. 502095742419d0277a4fb0d499a05ff29b7506f1c5emsarett return _mm_mulhi_epu16(_mm_add_epi16(_mm_mullo_epi16(x, y), _128), _257); 503095742419d0277a4fb0d499a05ff29b7506f1c5emsarett} 504095742419d0277a4fb0d499a05ff29b7506f1c5emsarett 50553b9d29b973f2828624f097bf110f1c7acc4b593msaretttemplate <bool kSwapRB> 5068bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtkleinstatic void premul_should_swapRB(uint32_t* dst, const void* vsrc, int count) { 5078bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein auto src = (const uint32_t*)vsrc; 50853b9d29b973f2828624f097bf110f1c7acc4b593msarett 50953b9d29b973f2828624f097bf110f1c7acc4b593msarett auto premul8 = [](__m128i* lo, __m128i* hi) { 51053b9d29b973f2828624f097bf110f1c7acc4b593msarett const __m128i zeros = _mm_setzero_si128(); 51153b9d29b973f2828624f097bf110f1c7acc4b593msarett __m128i planar; 51253b9d29b973f2828624f097bf110f1c7acc4b593msarett if (kSwapRB) { 51353b9d29b973f2828624f097bf110f1c7acc4b593msarett planar = _mm_setr_epi8(2,6,10,14, 1,5,9,13, 0,4,8,12, 3,7,11,15); 51453b9d29b973f2828624f097bf110f1c7acc4b593msarett } else { 51553b9d29b973f2828624f097bf110f1c7acc4b593msarett planar = _mm_setr_epi8(0,4,8,12, 1,5,9,13, 2,6,10,14, 3,7,11,15); 51653b9d29b973f2828624f097bf110f1c7acc4b593msarett } 51753b9d29b973f2828624f097bf110f1c7acc4b593msarett 51853b9d29b973f2828624f097bf110f1c7acc4b593msarett // Swizzle the pixels to 8-bit planar. 5198bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein *lo = _mm_shuffle_epi8(*lo, planar); // rrrrgggg bbbbaaaa 5208bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein *hi = _mm_shuffle_epi8(*hi, planar); // RRRRGGGG BBBBAAAA 5218bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein __m128i rg = _mm_unpacklo_epi32(*lo, *hi), // rrrrRRRR ggggGGGG 5228bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein ba = _mm_unpackhi_epi32(*lo, *hi); // bbbbBBBB aaaaAAAA 52353b9d29b973f2828624f097bf110f1c7acc4b593msarett 52453b9d29b973f2828624f097bf110f1c7acc4b593msarett // Unpack to 16-bit planar. 5258bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein __m128i r = _mm_unpacklo_epi8(rg, zeros), // r_r_r_r_ R_R_R_R_ 5268bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein g = _mm_unpackhi_epi8(rg, zeros), // g_g_g_g_ G_G_G_G_ 5278bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein b = _mm_unpacklo_epi8(ba, zeros), // b_b_b_b_ B_B_B_B_ 5288bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein a = _mm_unpackhi_epi8(ba, zeros); // a_a_a_a_ A_A_A_A_ 52953b9d29b973f2828624f097bf110f1c7acc4b593msarett 530095742419d0277a4fb0d499a05ff29b7506f1c5emsarett // Premultiply! 531095742419d0277a4fb0d499a05ff29b7506f1c5emsarett r = scale(r, a); 532095742419d0277a4fb0d499a05ff29b7506f1c5emsarett g = scale(g, a); 533095742419d0277a4fb0d499a05ff29b7506f1c5emsarett b = scale(b, a); 53453b9d29b973f2828624f097bf110f1c7acc4b593msarett 53553b9d29b973f2828624f097bf110f1c7acc4b593msarett // Repack into interlaced pixels. 5368bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein rg = _mm_or_si128(r, _mm_slli_epi16(g, 8)); // rgrgrgrg RGRGRGRG 5378bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein ba = _mm_or_si128(b, _mm_slli_epi16(a, 8)); // babababa BABABABA 5388bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein *lo = _mm_unpacklo_epi16(rg, ba); // rgbargba rgbargba 5398bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein *hi = _mm_unpackhi_epi16(rg, ba); // RGBARGBA RGBARGBA 54053b9d29b973f2828624f097bf110f1c7acc4b593msarett }; 54153b9d29b973f2828624f097bf110f1c7acc4b593msarett 54253b9d29b973f2828624f097bf110f1c7acc4b593msarett while (count >= 8) { 54353b9d29b973f2828624f097bf110f1c7acc4b593msarett __m128i lo = _mm_loadu_si128((const __m128i*) (src + 0)), 54453b9d29b973f2828624f097bf110f1c7acc4b593msarett hi = _mm_loadu_si128((const __m128i*) (src + 4)); 54553b9d29b973f2828624f097bf110f1c7acc4b593msarett 54653b9d29b973f2828624f097bf110f1c7acc4b593msarett premul8(&lo, &hi); 54753b9d29b973f2828624f097bf110f1c7acc4b593msarett 54853b9d29b973f2828624f097bf110f1c7acc4b593msarett _mm_storeu_si128((__m128i*) (dst + 0), lo); 54953b9d29b973f2828624f097bf110f1c7acc4b593msarett _mm_storeu_si128((__m128i*) (dst + 4), hi); 55053b9d29b973f2828624f097bf110f1c7acc4b593msarett 55153b9d29b973f2828624f097bf110f1c7acc4b593msarett src += 8; 55253b9d29b973f2828624f097bf110f1c7acc4b593msarett dst += 8; 55353b9d29b973f2828624f097bf110f1c7acc4b593msarett count -= 8; 55453b9d29b973f2828624f097bf110f1c7acc4b593msarett } 55553b9d29b973f2828624f097bf110f1c7acc4b593msarett 55653b9d29b973f2828624f097bf110f1c7acc4b593msarett if (count >= 4) { 55753b9d29b973f2828624f097bf110f1c7acc4b593msarett __m128i lo = _mm_loadu_si128((const __m128i*) src), 55853b9d29b973f2828624f097bf110f1c7acc4b593msarett hi = _mm_setzero_si128(); 55953b9d29b973f2828624f097bf110f1c7acc4b593msarett 56053b9d29b973f2828624f097bf110f1c7acc4b593msarett premul8(&lo, &hi); 56153b9d29b973f2828624f097bf110f1c7acc4b593msarett 56253b9d29b973f2828624f097bf110f1c7acc4b593msarett _mm_storeu_si128((__m128i*) dst, lo); 56353b9d29b973f2828624f097bf110f1c7acc4b593msarett 56453b9d29b973f2828624f097bf110f1c7acc4b593msarett src += 4; 56553b9d29b973f2828624f097bf110f1c7acc4b593msarett dst += 4; 56653b9d29b973f2828624f097bf110f1c7acc4b593msarett count -= 4; 56753b9d29b973f2828624f097bf110f1c7acc4b593msarett } 56853b9d29b973f2828624f097bf110f1c7acc4b593msarett 56953b9d29b973f2828624f097bf110f1c7acc4b593msarett // Call portable code to finish up the tail of [0,4) pixels. 5708bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein auto proc = kSwapRB ? RGBA_to_bgrA_portable : RGBA_to_rgbA_portable; 57153b9d29b973f2828624f097bf110f1c7acc4b593msarett proc(dst, src, count); 57253b9d29b973f2828624f097bf110f1c7acc4b593msarett} 57353b9d29b973f2828624f097bf110f1c7acc4b593msarett 574cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { 5758bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein premul_should_swapRB<false>(dst, src, count); 57653b9d29b973f2828624f097bf110f1c7acc4b593msarett} 57753b9d29b973f2828624f097bf110f1c7acc4b593msarett 578cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { 5798bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein premul_should_swapRB<true>(dst, src, count); 58053b9d29b973f2828624f097bf110f1c7acc4b593msarett} 58153b9d29b973f2828624f097bf110f1c7acc4b593msarett 582cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void RGBA_to_BGRA(uint32_t* dst, const void* vsrc, int count) { 5838bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein auto src = (const uint32_t*)vsrc; 58453b9d29b973f2828624f097bf110f1c7acc4b593msarett const __m128i swapRB = _mm_setr_epi8(2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15); 58553b9d29b973f2828624f097bf110f1c7acc4b593msarett 58653b9d29b973f2828624f097bf110f1c7acc4b593msarett while (count >= 4) { 5878bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein __m128i rgba = _mm_loadu_si128((const __m128i*) src); 5888bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein __m128i bgra = _mm_shuffle_epi8(rgba, swapRB); 5898bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein _mm_storeu_si128((__m128i*) dst, bgra); 59053b9d29b973f2828624f097bf110f1c7acc4b593msarett 59153b9d29b973f2828624f097bf110f1c7acc4b593msarett src += 4; 59253b9d29b973f2828624f097bf110f1c7acc4b593msarett dst += 4; 59353b9d29b973f2828624f097bf110f1c7acc4b593msarett count -= 4; 59453b9d29b973f2828624f097bf110f1c7acc4b593msarett } 59553b9d29b973f2828624f097bf110f1c7acc4b593msarett 5968bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein RGBA_to_BGRA_portable(dst, src, count); 59753b9d29b973f2828624f097bf110f1c7acc4b593msarett} 59853b9d29b973f2828624f097bf110f1c7acc4b593msarett 59913aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsaretttemplate <bool kSwapRB> 60013aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarettstatic void insert_alpha_should_swaprb(uint32_t dst[], const void* vsrc, int count) { 60113aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett const uint8_t* src = (const uint8_t*) vsrc; 60213aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett 60313aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett const __m128i alphaMask = _mm_set1_epi32(0xFF000000); 60413aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett __m128i expand; 60513aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett const uint8_t X = 0xFF; // Used a placeholder. The value of X is irrelevant. 60613aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett if (kSwapRB) { 60713aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett expand = _mm_setr_epi8(2,1,0,X, 5,4,3,X, 8,7,6,X, 11,10,9,X); 60813aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett } else { 60913aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett expand = _mm_setr_epi8(0,1,2,X, 3,4,5,X, 6,7,8,X, 9,10,11,X); 61013aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett } 61113aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett 61213aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett while (count >= 6) { 61313aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett // Load a vector. While this actually contains 5 pixels plus an 61413aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett // extra component, we will discard all but the first four pixels on 61513aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett // this iteration. 61613aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett __m128i rgb = _mm_loadu_si128((const __m128i*) src); 61713aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett 61813aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett // Expand the first four pixels to RGBX and then mask to RGB(FF). 61913aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett __m128i rgba = _mm_or_si128(_mm_shuffle_epi8(rgb, expand), alphaMask); 62013aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett 62113aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett // Store 4 pixels. 62213aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett _mm_storeu_si128((__m128i*) dst, rgba); 62313aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett 62413aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett src += 4*3; 62513aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett dst += 4; 62613aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett count -= 4; 62713aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett } 62813aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett 62913aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett // Call portable code to finish up the tail of [0,4) pixels. 63013aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett auto proc = kSwapRB ? RGB_to_BGR1_portable : RGB_to_RGB1_portable; 63113aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett proc(dst, src, count); 63213aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett} 63313aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett 634cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { 63513aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett insert_alpha_should_swaprb<false>(dst, src, count); 636f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett} 637f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett 638cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { 63913aa1a5ad97156e35184970fc1ce1aaf3c50c91cmsarett insert_alpha_should_swaprb<true>(dst, src, count); 640f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett} 641f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett 642cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void gray_to_RGB1(uint32_t dst[], const void* vsrc, int count) { 6430700651128f8c505da65e651f9788589593f07c4msarett const uint8_t* src = (const uint8_t*) vsrc; 6440700651128f8c505da65e651f9788589593f07c4msarett 6450700651128f8c505da65e651f9788589593f07c4msarett const __m128i alphas = _mm_set1_epi8((uint8_t) 0xFF); 6460700651128f8c505da65e651f9788589593f07c4msarett while (count >= 16) { 6470700651128f8c505da65e651f9788589593f07c4msarett __m128i grays = _mm_loadu_si128((const __m128i*) src); 6480700651128f8c505da65e651f9788589593f07c4msarett 6490700651128f8c505da65e651f9788589593f07c4msarett __m128i gg_lo = _mm_unpacklo_epi8(grays, grays); 6500700651128f8c505da65e651f9788589593f07c4msarett __m128i gg_hi = _mm_unpackhi_epi8(grays, grays); 6510700651128f8c505da65e651f9788589593f07c4msarett __m128i ga_lo = _mm_unpacklo_epi8(grays, alphas); 6520700651128f8c505da65e651f9788589593f07c4msarett __m128i ga_hi = _mm_unpackhi_epi8(grays, alphas); 6530700651128f8c505da65e651f9788589593f07c4msarett 6540700651128f8c505da65e651f9788589593f07c4msarett __m128i ggga0 = _mm_unpacklo_epi16(gg_lo, ga_lo); 6550700651128f8c505da65e651f9788589593f07c4msarett __m128i ggga1 = _mm_unpackhi_epi16(gg_lo, ga_lo); 6560700651128f8c505da65e651f9788589593f07c4msarett __m128i ggga2 = _mm_unpacklo_epi16(gg_hi, ga_hi); 6570700651128f8c505da65e651f9788589593f07c4msarett __m128i ggga3 = _mm_unpackhi_epi16(gg_hi, ga_hi); 6580700651128f8c505da65e651f9788589593f07c4msarett 6590700651128f8c505da65e651f9788589593f07c4msarett _mm_storeu_si128((__m128i*) (dst + 0), ggga0); 6600700651128f8c505da65e651f9788589593f07c4msarett _mm_storeu_si128((__m128i*) (dst + 4), ggga1); 6610700651128f8c505da65e651f9788589593f07c4msarett _mm_storeu_si128((__m128i*) (dst + 8), ggga2); 6620700651128f8c505da65e651f9788589593f07c4msarett _mm_storeu_si128((__m128i*) (dst + 12), ggga3); 6630700651128f8c505da65e651f9788589593f07c4msarett 6640700651128f8c505da65e651f9788589593f07c4msarett src += 16; 6650700651128f8c505da65e651f9788589593f07c4msarett dst += 16; 6660700651128f8c505da65e651f9788589593f07c4msarett count -= 16; 6670700651128f8c505da65e651f9788589593f07c4msarett } 6680700651128f8c505da65e651f9788589593f07c4msarett 6692eff71c9b5f984b58961e5a6b4e66774c4385224msarett gray_to_RGB1_portable(dst, src, count); 6702eff71c9b5f984b58961e5a6b4e66774c4385224msarett} 6712eff71c9b5f984b58961e5a6b4e66774c4385224msarett 672cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void grayA_to_RGBA(uint32_t dst[], const void* vsrc, int count) { 673095742419d0277a4fb0d499a05ff29b7506f1c5emsarett const uint8_t* src = (const uint8_t*) vsrc; 674095742419d0277a4fb0d499a05ff29b7506f1c5emsarett while (count >= 8) { 675095742419d0277a4fb0d499a05ff29b7506f1c5emsarett __m128i ga = _mm_loadu_si128((const __m128i*) src); 676095742419d0277a4fb0d499a05ff29b7506f1c5emsarett 677095742419d0277a4fb0d499a05ff29b7506f1c5emsarett __m128i gg = _mm_or_si128(_mm_and_si128(ga, _mm_set1_epi16(0x00FF)), 678095742419d0277a4fb0d499a05ff29b7506f1c5emsarett _mm_slli_epi16(ga, 8)); 679095742419d0277a4fb0d499a05ff29b7506f1c5emsarett 680095742419d0277a4fb0d499a05ff29b7506f1c5emsarett __m128i ggga_lo = _mm_unpacklo_epi16(gg, ga); 681095742419d0277a4fb0d499a05ff29b7506f1c5emsarett __m128i ggga_hi = _mm_unpackhi_epi16(gg, ga); 682095742419d0277a4fb0d499a05ff29b7506f1c5emsarett 683095742419d0277a4fb0d499a05ff29b7506f1c5emsarett _mm_storeu_si128((__m128i*) (dst + 0), ggga_lo); 684095742419d0277a4fb0d499a05ff29b7506f1c5emsarett _mm_storeu_si128((__m128i*) (dst + 4), ggga_hi); 685095742419d0277a4fb0d499a05ff29b7506f1c5emsarett 686095742419d0277a4fb0d499a05ff29b7506f1c5emsarett src += 8*2; 687095742419d0277a4fb0d499a05ff29b7506f1c5emsarett dst += 8; 688095742419d0277a4fb0d499a05ff29b7506f1c5emsarett count -= 8; 689095742419d0277a4fb0d499a05ff29b7506f1c5emsarett } 690095742419d0277a4fb0d499a05ff29b7506f1c5emsarett 6911e06079b259d1091b735492b2f71d9897c14c608msarett grayA_to_RGBA_portable(dst, src, count); 6921e06079b259d1091b735492b2f71d9897c14c608msarett} 6931e06079b259d1091b735492b2f71d9897c14c608msarett 694cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void grayA_to_rgbA(uint32_t dst[], const void* vsrc, int count) { 695095742419d0277a4fb0d499a05ff29b7506f1c5emsarett const uint8_t* src = (const uint8_t*) vsrc; 696095742419d0277a4fb0d499a05ff29b7506f1c5emsarett while (count >= 8) { 697095742419d0277a4fb0d499a05ff29b7506f1c5emsarett __m128i grayA = _mm_loadu_si128((const __m128i*) src); 698095742419d0277a4fb0d499a05ff29b7506f1c5emsarett 699095742419d0277a4fb0d499a05ff29b7506f1c5emsarett __m128i g0 = _mm_and_si128(grayA, _mm_set1_epi16(0x00FF)); 700095742419d0277a4fb0d499a05ff29b7506f1c5emsarett __m128i a0 = _mm_srli_epi16(grayA, 8); 701095742419d0277a4fb0d499a05ff29b7506f1c5emsarett 702095742419d0277a4fb0d499a05ff29b7506f1c5emsarett // Premultiply 703095742419d0277a4fb0d499a05ff29b7506f1c5emsarett g0 = scale(g0, a0); 704095742419d0277a4fb0d499a05ff29b7506f1c5emsarett 705095742419d0277a4fb0d499a05ff29b7506f1c5emsarett __m128i gg = _mm_or_si128(g0, _mm_slli_epi16(g0, 8)); 706095742419d0277a4fb0d499a05ff29b7506f1c5emsarett __m128i ga = _mm_or_si128(g0, _mm_slli_epi16(a0, 8)); 707095742419d0277a4fb0d499a05ff29b7506f1c5emsarett 708095742419d0277a4fb0d499a05ff29b7506f1c5emsarett 709095742419d0277a4fb0d499a05ff29b7506f1c5emsarett __m128i ggga_lo = _mm_unpacklo_epi16(gg, ga); 710095742419d0277a4fb0d499a05ff29b7506f1c5emsarett __m128i ggga_hi = _mm_unpackhi_epi16(gg, ga); 711095742419d0277a4fb0d499a05ff29b7506f1c5emsarett 712095742419d0277a4fb0d499a05ff29b7506f1c5emsarett _mm_storeu_si128((__m128i*) (dst + 0), ggga_lo); 713095742419d0277a4fb0d499a05ff29b7506f1c5emsarett _mm_storeu_si128((__m128i*) (dst + 4), ggga_hi); 714095742419d0277a4fb0d499a05ff29b7506f1c5emsarett 715095742419d0277a4fb0d499a05ff29b7506f1c5emsarett src += 8*2; 716095742419d0277a4fb0d499a05ff29b7506f1c5emsarett dst += 8; 717095742419d0277a4fb0d499a05ff29b7506f1c5emsarett count -= 8; 718095742419d0277a4fb0d499a05ff29b7506f1c5emsarett } 719095742419d0277a4fb0d499a05ff29b7506f1c5emsarett 7201e06079b259d1091b735492b2f71d9897c14c608msarett grayA_to_rgbA_portable(dst, src, count); 7211e06079b259d1091b735492b2f71d9897c14c608msarett} 7221e06079b259d1091b735492b2f71d9897c14c608msarett 723c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarettenum Format { kRGB1, kBGR1 }; 724c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msaretttemplate <Format format> 725c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarettstatic void inverted_cmyk_to(uint32_t* dst, const void* vsrc, int count) { 726c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett auto src = (const uint32_t*)vsrc; 727c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 728c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett auto convert8 = [](__m128i* lo, __m128i* hi) { 729c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett const __m128i zeros = _mm_setzero_si128(); 730c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett __m128i planar; 731c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett if (kBGR1 == format) { 732c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett planar = _mm_setr_epi8(2,6,10,14, 1,5,9,13, 0,4,8,12, 3,7,11,15); 733c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett } else { 734c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett planar = _mm_setr_epi8(0,4,8,12, 1,5,9,13, 2,6,10,14, 3,7,11,15); 735c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett } 736c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 737c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett // Swizzle the pixels to 8-bit planar. 738c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett *lo = _mm_shuffle_epi8(*lo, planar); // ccccmmmm yyyykkkk 739c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett *hi = _mm_shuffle_epi8(*hi, planar); // CCCCMMMM YYYYKKKK 740c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett __m128i cm = _mm_unpacklo_epi32(*lo, *hi), // ccccCCCC mmmmMMMM 741c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett yk = _mm_unpackhi_epi32(*lo, *hi); // yyyyYYYY kkkkKKKK 742c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 743c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett // Unpack to 16-bit planar. 744c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett __m128i c = _mm_unpacklo_epi8(cm, zeros), // c_c_c_c_ C_C_C_C_ 745c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett m = _mm_unpackhi_epi8(cm, zeros), // m_m_m_m_ M_M_M_M_ 746c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett y = _mm_unpacklo_epi8(yk, zeros), // y_y_y_y_ Y_Y_Y_Y_ 747c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett k = _mm_unpackhi_epi8(yk, zeros); // k_k_k_k_ K_K_K_K_ 748c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 749c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett // Scale to r, g, b. 750c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett __m128i r = scale(c, k), 751c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett g = scale(m, k), 752c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett b = scale(y, k); 753c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 754c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett // Repack into interlaced pixels. 755c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett __m128i rg = _mm_or_si128(r, _mm_slli_epi16(g, 8)), // rgrgrgrg RGRGRGRG 756c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett ba = _mm_or_si128(b, _mm_set1_epi16((uint16_t) 0xFF00)); // b1b1b1b1 B1B1B1B1 757c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett *lo = _mm_unpacklo_epi16(rg, ba); // rgbargba rgbargba 758c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett *hi = _mm_unpackhi_epi16(rg, ba); // RGB1RGB1 RGB1RGB1 759c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett }; 760c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 761c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett while (count >= 8) { 762c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett __m128i lo = _mm_loadu_si128((const __m128i*) (src + 0)), 763c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett hi = _mm_loadu_si128((const __m128i*) (src + 4)); 764c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 765c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett convert8(&lo, &hi); 766c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 767c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett _mm_storeu_si128((__m128i*) (dst + 0), lo); 768c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett _mm_storeu_si128((__m128i*) (dst + 4), hi); 769c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 770c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett src += 8; 771c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett dst += 8; 772c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett count -= 8; 773c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett } 774c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 775c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett if (count >= 4) { 776c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett __m128i lo = _mm_loadu_si128((const __m128i*) src), 777c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett hi = _mm_setzero_si128(); 778c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 779c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett convert8(&lo, &hi); 780c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 781c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett _mm_storeu_si128((__m128i*) dst, lo); 782c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 783c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett src += 4; 784c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett dst += 4; 785c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett count -= 4; 786c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett } 787c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 788c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett auto proc = (kBGR1 == format) ? inverted_CMYK_to_BGR1_portable : inverted_CMYK_to_RGB1_portable; 789c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett proc(dst, src, count); 790c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett} 791c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 792cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) { 793c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett inverted_cmyk_to<kRGB1>(dst, src, count); 794c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett} 795c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 796cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) { 797c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett inverted_cmyk_to<kBGR1>(dst, src, count); 798c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett} 799c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 8003a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett#else 8013a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 802cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { 8038bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein RGBA_to_rgbA_portable(dst, src, count); 8043a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett} 8053a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 806cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { 8078bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein RGBA_to_bgrA_portable(dst, src, count); 8083a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett} 8093a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 810cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void RGBA_to_BGRA(uint32_t* dst, const void* src, int count) { 8118bf7b79cf9776b4edb3f6810e5ab8c80c49d3480mtklein RGBA_to_BGRA_portable(dst, src, count); 8123a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett} 8133a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 814cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { 815f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett RGB_to_RGB1_portable(dst, src, count); 816f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett} 817f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett 818cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { 819f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett RGB_to_BGR1_portable(dst, src, count); 820f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett} 821f1b8b6ae34e5a1f4b29e423401da39f88f0c117amsarett 822cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void gray_to_RGB1(uint32_t dst[], const void* src, int count) { 8232eff71c9b5f984b58961e5a6b4e66774c4385224msarett gray_to_RGB1_portable(dst, src, count); 8242eff71c9b5f984b58961e5a6b4e66774c4385224msarett} 8252eff71c9b5f984b58961e5a6b4e66774c4385224msarett 826cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { 8271e06079b259d1091b735492b2f71d9897c14c608msarett grayA_to_RGBA_portable(dst, src, count); 8281e06079b259d1091b735492b2f71d9897c14c608msarett} 8291e06079b259d1091b735492b2f71d9897c14c608msarett 830cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { 8311e06079b259d1091b735492b2f71d9897c14c608msarett grayA_to_rgbA_portable(dst, src, count); 8321e06079b259d1091b735492b2f71d9897c14c608msarett} 8331e06079b259d1091b735492b2f71d9897c14c608msarett 834cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) { 835c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett inverted_CMYK_to_RGB1_portable(dst, src, count); 836c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett} 837c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 838cd71f115a846332d95b29fbeed3f315d8c01753dMike Klein/*not static*/ inline void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) { 839c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett inverted_CMYK_to_BGR1_portable(dst, src, count); 840c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett} 841c5c322d8ecfc05718f9f04360956c4f1f9dc33c1msarett 84203108de163354fa574679ad153b58ce57126b2bamsarett#endif 84303108de163354fa574679ad153b58ce57126b2bamsarett 8443a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett} 8453a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett 8463a24f459582f2665f0e66bd35a0d8f46a1c4c72fmsarett#endif // SkSwizzler_opts_DEFINED 847