1926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)/* 2926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Copyright (C) 2012 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Szeged 3926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 4926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Redistribution and use in source and binary forms, with or without 5926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * modification, are permitted provided that the following conditions 6926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * are met: 7926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 1. Redistributions of source code must retain the above copyright 8926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * notice, this list of conditions and the following disclaimer. 9926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 2. Redistributions in binary form must reproduce the above copyright 10926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * notice, this list of conditions and the following disclaimer in the 11926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * documentation and/or other materials provided with the distribution. 12926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 13926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY 14926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR 17926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) */ 25926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 2609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)#ifndef WebGLImageConversionNEON_h 2709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)#define WebGLImageConversionNEON_h 28926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 29926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#if HAVE(ARM_NEON_INTRINSICS) 30926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 31926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#include <arm_neon.h> 32926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 33c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)namespace blink { 34926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 35926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)namespace SIMD { 36926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 37926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void unpackOneRowOfRGBA16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow) 38926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){ 39926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned componentsPerRow = pixelsPerRow * 4; 40926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned tailComponents = componentsPerRow % 16; 41926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned componentsSize = componentsPerRow - tailComponents; 42926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) const uint8_t* src = reinterpret_cast<const uint8_t*>(source); 43926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 44926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) for (unsigned i = 0; i < componentsSize; i += 16) { 45926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x16x2_t components = vld2q_u8(src + i * 2); 46926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) vst1q_u8(destination + i, components.val[1]); 47926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 48926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 49926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) source += componentsSize; 50926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) destination += componentsSize; 51926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) pixelsPerRow = tailComponents / 4; 52926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)} 53926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 54926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void unpackOneRowOfRGB16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow) 55926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){ 56926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned componentsPerRow = pixelsPerRow * 3; 57926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned tailComponents = componentsPerRow % 24; 58926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned componentsSize = componentsPerRow - tailComponents; 59926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 60926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentA = vdup_n_u8(0xFF); 61926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) for (unsigned i = 0; i < componentsSize; i += 24) { 62926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint16x8x3_t RGB16 = vld3q_u16(source + i); 63926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(RGB16.val[0], 8)); 64926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(RGB16.val[1], 8)); 65926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(RGB16.val[2], 8)); 66926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; 67926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) vst4_u8(destination, RGBA8); 68926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) destination += 32; 69926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 70926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 71926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) source += componentsSize; 72926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) pixelsPerRow = tailComponents / 3; 73926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)} 74926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 75926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void unpackOneRowOfARGB16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow) 76926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){ 77926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned componentsPerRow = pixelsPerRow * 4; 78926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned tailComponents = componentsPerRow % 32; 79926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned componentsSize = componentsPerRow - tailComponents; 80926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 81926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) for (unsigned i = 0; i < componentsSize; i += 32) { 82926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint16x8x4_t ARGB16 = vld4q_u16(source + i); 83926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8)); 84926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8)); 85926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8)); 86926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8)); 87926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; 88926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) vst4_u8(destination + i, RGBA8); 89926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 90926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 91926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) source += componentsSize; 92926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) destination += componentsSize; 93926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) pixelsPerRow = tailComponents / 4; 94926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)} 95926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 96926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void unpackOneRowOfBGRA16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow) 97926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){ 98926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned componentsPerRow = pixelsPerRow * 4; 99926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned tailComponents = componentsPerRow % 32; 100926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned componentsSize = componentsPerRow - tailComponents; 101926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 102926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) for (unsigned i = 0; i < componentsSize; i += 32) { 103926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint16x8x4_t ARGB16 = vld4q_u16(source + i); 104926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8)); 105926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8)); 106926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8)); 107926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8)); 108926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; 109926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) vst4_u8(destination + i, RGBA8); 110926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 111926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 112926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) source += componentsSize; 113926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) destination += componentsSize; 114926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) pixelsPerRow = tailComponents / 4; 115926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)} 116926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 117926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void unpackOneRowOfRGBA4444ToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow) 118926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){ 119926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned tailPixels = pixelsPerRow % 8; 120926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned pixelSize = pixelsPerRow - tailPixels; 121926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 122926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint16x8_t immediate0x0f = vdupq_n_u16(0x0F); 123926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) for (unsigned i = 0; i < pixelSize; i += 8) { 124926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint16x8_t eightPixels = vld1q_u16(source + i); 125926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 126926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 12)); 127926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 8), immediate0x0f)); 128926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 4), immediate0x0f)); 129926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x0f)); 130926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 131926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) componentR = vorr_u8(vshl_n_u8(componentR, 4), componentR); 132926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) componentG = vorr_u8(vshl_n_u8(componentG, 4), componentG); 133926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) componentB = vorr_u8(vshl_n_u8(componentB, 4), componentB); 134926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) componentA = vorr_u8(vshl_n_u8(componentA, 4), componentA); 135926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 136926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8x4_t destComponents = {{componentR, componentG, componentB, componentA}}; 137926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) vst4_u8(destination, destComponents); 138926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) destination += 32; 139926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 140926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 141926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) source += pixelSize; 142926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) pixelsPerRow = tailPixels; 143926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)} 144926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 145926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort4444(const uint8_t*& source, uint16_t*& destination, unsigned& pixelsPerRow) 146926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){ 147926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned componentsPerRow = pixelsPerRow * 4; 148926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned tailComponents = componentsPerRow % 32; 149926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned componentsSize = componentsPerRow - tailComponents; 150926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 151926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8_t* dst = reinterpret_cast<uint8_t*>(destination); 152926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t immediate0xf0 = vdup_n_u8(0xF0); 153926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) for (unsigned i = 0; i < componentsSize; i += 32) { 154926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8x4_t RGBA8 = vld4_u8(source + i); 155926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 156926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf0); 157926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentG = vshr_n_u8(vand_u8(RGBA8.val[1], immediate0xf0), 4); 158926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentB = vand_u8(RGBA8.val[2], immediate0xf0); 159926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentA = vshr_n_u8(vand_u8(RGBA8.val[3], immediate0xf0), 4); 160926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 161926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8x2_t RGBA4; 162926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) RGBA4.val[0] = vorr_u8(componentB, componentA); 163926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) RGBA4.val[1] = vorr_u8(componentR, componentG); 164926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) vst2_u8(dst, RGBA4); 165926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) dst += 16; 166926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 167926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 168926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) source += componentsSize; 169926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) destination += componentsSize / 4; 170926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) pixelsPerRow = tailComponents / 4; 171926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)} 172926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 173926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow) 174926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){ 175926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned tailPixels = pixelsPerRow % 8; 176926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned pixelSize = pixelsPerRow - tailPixels; 177926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 178926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t immediate0x7 = vdup_n_u8(0x7); 179926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t immediate0xff = vdup_n_u8(0xFF); 180926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint16x8_t immediate0x1f = vdupq_n_u16(0x1F); 181926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint16x8_t immediate0x1 = vdupq_n_u16(0x1); 182926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 183926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) for (unsigned i = 0; i < pixelSize; i += 8) { 184926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint16x8_t eightPixels = vld1q_u16(source + i); 185926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 186926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 11)); 187926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 6), immediate0x1f)); 188926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 1), immediate0x1f)); 189926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x1)); 190926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 191926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) componentR = vorr_u8(vshl_n_u8(componentR, 3), vand_u8(componentR, immediate0x7)); 192926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) componentG = vorr_u8(vshl_n_u8(componentG, 3), vand_u8(componentG, immediate0x7)); 193926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) componentB = vorr_u8(vshl_n_u8(componentB, 3), vand_u8(componentB, immediate0x7)); 194926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) componentA = vmul_u8(componentA, immediate0xff); 195926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 196926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8x4_t destComponents = {{componentR, componentG, componentB, componentA}}; 197926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) vst4_u8(destination, destComponents); 198926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) destination += 32; 199926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 200926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 201926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) source += pixelSize; 202926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) pixelsPerRow = tailPixels; 203926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)} 204926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 205926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort5551(const uint8_t*& source, uint16_t*& destination, unsigned& pixelsPerRow) 206926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){ 207926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned componentsPerRow = pixelsPerRow * 4; 208926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned tailComponents = componentsPerRow % 32; 209926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned componentsSize = componentsPerRow - tailComponents; 210926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 211926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8_t* dst = reinterpret_cast<uint8_t*>(destination); 212926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 213926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t immediate0xf8 = vdup_n_u8(0xF8); 214926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t immediate0x18 = vdup_n_u8(0x18); 215926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) for (unsigned i = 0; i < componentsSize; i += 32) { 216926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8x4_t RGBA8 = vld4_u8(source + i); 217926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 218926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf8); 219926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentG3bit = vshr_n_u8(RGBA8.val[1], 5); 220926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 221926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentG2bit = vshl_n_u8(vand_u8(RGBA8.val[1], immediate0x18), 3); 222926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentB = vshr_n_u8(vand_u8(RGBA8.val[2], immediate0xf8), 2); 223926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentA = vshr_n_u8(RGBA8.val[3], 7); 224926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 225926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8x2_t RGBA5551; 226926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) RGBA5551.val[0] = vorr_u8(vorr_u8(componentG2bit, componentB), componentA); 227926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) RGBA5551.val[1] = vorr_u8(componentR, componentG3bit); 228926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) vst2_u8(dst, RGBA5551); 229926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) dst += 16; 230926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 231926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 232926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) source += componentsSize; 233926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) destination += componentsSize / 4; 234926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) pixelsPerRow = tailComponents / 4; 235926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)} 236926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 237926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void unpackOneRowOfRGB565ToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow) 238926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){ 239926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned tailPixels = pixelsPerRow % 8; 240926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned pixelSize = pixelsPerRow - tailPixels; 241926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 242926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint16x8_t immediate0x3f = vdupq_n_u16(0x3F); 243926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint16x8_t immediate0x1f = vdupq_n_u16(0x1F); 244926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t immediate0x3 = vdup_n_u8(0x3); 245926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t immediate0x7 = vdup_n_u8(0x7); 246926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 247926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentA = vdup_n_u8(0xFF); 248926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 249926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) for (unsigned i = 0; i < pixelSize; i += 8) { 250926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint16x8_t eightPixels = vld1q_u16(source + i); 251926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 252926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 11)); 253926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 5), immediate0x3f)); 254926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentB = vqmovn_u16(vandq_u16(eightPixels, immediate0x1f)); 255926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 256926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) componentR = vorr_u8(vshl_n_u8(componentR, 3), vand_u8(componentR, immediate0x7)); 257926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) componentG = vorr_u8(vshl_n_u8(componentG, 2), vand_u8(componentG, immediate0x3)); 258926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) componentB = vorr_u8(vshl_n_u8(componentB, 3), vand_u8(componentB, immediate0x7)); 259926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 260926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8x4_t destComponents = {{componentR, componentG, componentB, componentA}}; 261926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) vst4_u8(destination, destComponents); 262926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) destination += 32; 263926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 264926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 265926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) source += pixelSize; 266926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) pixelsPerRow = tailPixels; 267926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)} 268926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 269926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort565(const uint8_t*& source, uint16_t*& destination, unsigned& pixelsPerRow) 270926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){ 271926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned componentsPerRow = pixelsPerRow * 4; 272926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned tailComponents = componentsPerRow % 32; 273926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) unsigned componentsSize = componentsPerRow - tailComponents; 274926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8_t* dst = reinterpret_cast<uint8_t*>(destination); 275926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 276926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t immediate0xf8 = vdup_n_u8(0xF8); 277926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t immediate0x1c = vdup_n_u8(0x1C); 278926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) for (unsigned i = 0; i < componentsSize; i += 32) { 279926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8x4_t RGBA8 = vld4_u8(source + i); 280926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 281926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf8); 282926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentGLeft = vshr_n_u8(RGBA8.val[1], 5); 283926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentGRight = vshl_n_u8(vand_u8(RGBA8.val[1], immediate0x1c), 3); 284926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8_t componentB = vshr_n_u8(vand_u8(RGBA8.val[2], immediate0xf8), 3); 285926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 286926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) uint8x8x2_t RGB565; 287926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) RGB565.val[0] = vorr_u8(componentGRight, componentB); 288926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) RGB565.val[1] = vorr_u8(componentR, componentGLeft); 289926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) vst2_u8(dst, RGB565); 290926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) dst += 16; 291926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) } 292926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 293926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) source += componentsSize; 294926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) destination += componentsSize / 4; 295926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) pixelsPerRow = tailComponents / 4; 296926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)} 297926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 298926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)} // namespace SIMD 299926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 300c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)} // namespace blink 301926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 302926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#endif // HAVE(ARM_NEON_INTRINSICS) 303926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) 30409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)#endif // WebGLImageConversionNEON_h 305