1926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)/*
2926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Copyright (C) 2012 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Szeged
3926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *
4926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * Redistribution and use in source and binary forms, with or without
5926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * modification, are permitted provided that the following conditions
6926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * are met:
7926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 1. Redistributions of source code must retain the above copyright
8926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *    notice, this list of conditions and the following disclaimer.
9926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * 2. Redistributions in binary form must reproduce the above copyright
10926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *    notice, this list of conditions and the following disclaimer in the
11926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *    documentation and/or other materials provided with the distribution.
12926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) *
13926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY
14926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL UNIVERSITY OF SZEGED OR
17926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) */
25926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
2609380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)#ifndef WebGLImageConversionNEON_h
2709380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)#define WebGLImageConversionNEON_h
28926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
29926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#if HAVE(ARM_NEON_INTRINSICS)
30926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
31926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#include <arm_neon.h>
32926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
33c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)namespace blink {
34926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
35926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)namespace SIMD {
36926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
37926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void unpackOneRowOfRGBA16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
38926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
39926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned componentsPerRow = pixelsPerRow * 4;
40926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned tailComponents = componentsPerRow % 16;
41926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned componentsSize = componentsPerRow - tailComponents;
42926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    const uint8_t* src = reinterpret_cast<const uint8_t*>(source);
43926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
44926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    for (unsigned i = 0; i < componentsSize; i += 16) {
45926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x16x2_t components = vld2q_u8(src + i * 2);
46926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        vst1q_u8(destination + i, components.val[1]);
47926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
48926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
49926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    source += componentsSize;
50926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    destination += componentsSize;
51926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    pixelsPerRow = tailComponents / 4;
52926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
53926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
54926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void unpackOneRowOfRGB16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
55926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
56926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned componentsPerRow = pixelsPerRow * 3;
57926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned tailComponents = componentsPerRow % 24;
58926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned componentsSize = componentsPerRow - tailComponents;
59926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
60926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint8x8_t componentA = vdup_n_u8(0xFF);
61926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    for (unsigned i = 0; i < componentsSize; i += 24) {
62926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint16x8x3_t RGB16 = vld3q_u16(source + i);
63926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(RGB16.val[0], 8));
64926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(RGB16.val[1], 8));
65926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(RGB16.val[2], 8));
66926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}};
67926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        vst4_u8(destination, RGBA8);
68926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        destination += 32;
69926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
70926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
71926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    source += componentsSize;
72926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    pixelsPerRow = tailComponents / 3;
73926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
74926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
75926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void unpackOneRowOfARGB16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
76926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
77926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned componentsPerRow = pixelsPerRow * 4;
78926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned tailComponents = componentsPerRow % 32;
79926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned componentsSize = componentsPerRow - tailComponents;
80926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
81926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    for (unsigned i = 0; i < componentsSize; i += 32) {
82926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint16x8x4_t ARGB16 = vld4q_u16(source + i);
83926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8));
84926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8));
85926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8));
86926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8));
87926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}};
88926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        vst4_u8(destination + i, RGBA8);
89926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
90926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
91926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    source += componentsSize;
92926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    destination += componentsSize;
93926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    pixelsPerRow = tailComponents / 4;
94926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
95926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
96926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void unpackOneRowOfBGRA16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
97926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
98926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned componentsPerRow = pixelsPerRow * 4;
99926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned tailComponents = componentsPerRow % 32;
100926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned componentsSize = componentsPerRow - tailComponents;
101926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
102926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    for (unsigned i = 0; i < componentsSize; i += 32) {
103926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint16x8x4_t ARGB16 = vld4q_u16(source + i);
104926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8));
105926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8));
106926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8));
107926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8));
108926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}};
109926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        vst4_u8(destination + i, RGBA8);
110926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
111926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
112926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    source += componentsSize;
113926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    destination += componentsSize;
114926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    pixelsPerRow = tailComponents / 4;
115926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
116926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
117926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void unpackOneRowOfRGBA4444ToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
118926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
119926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned tailPixels = pixelsPerRow % 8;
120926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned pixelSize = pixelsPerRow - tailPixels;
121926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
122926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint16x8_t immediate0x0f = vdupq_n_u16(0x0F);
123926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    for (unsigned i = 0; i < pixelSize; i += 8) {
124926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint16x8_t eightPixels = vld1q_u16(source + i);
125926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
126926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 12));
127926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 8), immediate0x0f));
128926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 4), immediate0x0f));
129926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x0f));
130926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
131926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        componentR = vorr_u8(vshl_n_u8(componentR, 4), componentR);
132926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        componentG = vorr_u8(vshl_n_u8(componentG, 4), componentG);
133926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        componentB = vorr_u8(vshl_n_u8(componentB, 4), componentB);
134926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        componentA = vorr_u8(vshl_n_u8(componentA, 4), componentA);
135926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
136926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8x4_t destComponents = {{componentR, componentG, componentB, componentA}};
137926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        vst4_u8(destination, destComponents);
138926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        destination += 32;
139926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
140926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
141926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    source += pixelSize;
142926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    pixelsPerRow = tailPixels;
143926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
144926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
145926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort4444(const uint8_t*& source, uint16_t*& destination, unsigned& pixelsPerRow)
146926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
147926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned componentsPerRow = pixelsPerRow * 4;
148926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned tailComponents = componentsPerRow % 32;
149926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned componentsSize = componentsPerRow - tailComponents;
150926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
151926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint8_t* dst = reinterpret_cast<uint8_t*>(destination);
152926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint8x8_t immediate0xf0 = vdup_n_u8(0xF0);
153926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    for (unsigned i = 0; i < componentsSize; i += 32) {
154926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8x4_t RGBA8 = vld4_u8(source + i);
155926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
156926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf0);
157926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentG = vshr_n_u8(vand_u8(RGBA8.val[1], immediate0xf0), 4);
158926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentB = vand_u8(RGBA8.val[2], immediate0xf0);
159926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentA = vshr_n_u8(vand_u8(RGBA8.val[3], immediate0xf0), 4);
160926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
161926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8x2_t RGBA4;
162926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        RGBA4.val[0] = vorr_u8(componentB, componentA);
163926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        RGBA4.val[1] = vorr_u8(componentR, componentG);
164926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        vst2_u8(dst, RGBA4);
165926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        dst += 16;
166926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
167926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
168926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    source += componentsSize;
169926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    destination += componentsSize / 4;
170926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    pixelsPerRow = tailComponents / 4;
171926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
172926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
173926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
174926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
175926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned tailPixels = pixelsPerRow % 8;
176926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned pixelSize = pixelsPerRow - tailPixels;
177926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
178926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint8x8_t immediate0x7 = vdup_n_u8(0x7);
179926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint8x8_t immediate0xff = vdup_n_u8(0xFF);
180926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint16x8_t immediate0x1f = vdupq_n_u16(0x1F);
181926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint16x8_t immediate0x1 = vdupq_n_u16(0x1);
182926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
183926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    for (unsigned i = 0; i < pixelSize; i += 8) {
184926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint16x8_t eightPixels = vld1q_u16(source + i);
185926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
186926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 11));
187926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 6), immediate0x1f));
188926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 1), immediate0x1f));
189926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x1));
190926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
191926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        componentR = vorr_u8(vshl_n_u8(componentR, 3), vand_u8(componentR, immediate0x7));
192926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        componentG = vorr_u8(vshl_n_u8(componentG, 3), vand_u8(componentG, immediate0x7));
193926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        componentB = vorr_u8(vshl_n_u8(componentB, 3), vand_u8(componentB, immediate0x7));
194926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        componentA = vmul_u8(componentA, immediate0xff);
195926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
196926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8x4_t destComponents = {{componentR, componentG, componentB, componentA}};
197926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        vst4_u8(destination, destComponents);
198926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        destination += 32;
199926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
200926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
201926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    source += pixelSize;
202926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    pixelsPerRow = tailPixels;
203926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
204926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
205926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort5551(const uint8_t*& source, uint16_t*& destination, unsigned& pixelsPerRow)
206926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
207926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned componentsPerRow = pixelsPerRow * 4;
208926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned tailComponents = componentsPerRow % 32;
209926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned componentsSize = componentsPerRow - tailComponents;
210926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
211926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint8_t* dst = reinterpret_cast<uint8_t*>(destination);
212926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
213926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint8x8_t immediate0xf8 = vdup_n_u8(0xF8);
214926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint8x8_t immediate0x18 = vdup_n_u8(0x18);
215926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    for (unsigned i = 0; i < componentsSize; i += 32) {
216926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8x4_t RGBA8 = vld4_u8(source + i);
217926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
218926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf8);
219926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentG3bit = vshr_n_u8(RGBA8.val[1], 5);
220926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
221926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentG2bit = vshl_n_u8(vand_u8(RGBA8.val[1], immediate0x18), 3);
222926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentB = vshr_n_u8(vand_u8(RGBA8.val[2], immediate0xf8), 2);
223926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentA = vshr_n_u8(RGBA8.val[3], 7);
224926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
225926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8x2_t RGBA5551;
226926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        RGBA5551.val[0] = vorr_u8(vorr_u8(componentG2bit, componentB), componentA);
227926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        RGBA5551.val[1] = vorr_u8(componentR, componentG3bit);
228926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        vst2_u8(dst, RGBA5551);
229926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        dst += 16;
230926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
231926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
232926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    source += componentsSize;
233926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    destination += componentsSize / 4;
234926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    pixelsPerRow = tailComponents / 4;
235926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
236926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
237926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void unpackOneRowOfRGB565ToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
238926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
239926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned tailPixels = pixelsPerRow % 8;
240926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned pixelSize = pixelsPerRow - tailPixels;
241926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
242926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint16x8_t immediate0x3f = vdupq_n_u16(0x3F);
243926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint16x8_t immediate0x1f = vdupq_n_u16(0x1F);
244926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint8x8_t immediate0x3 = vdup_n_u8(0x3);
245926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint8x8_t immediate0x7 = vdup_n_u8(0x7);
246926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
247926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint8x8_t componentA = vdup_n_u8(0xFF);
248926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
249926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    for (unsigned i = 0; i < pixelSize; i += 8) {
250926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint16x8_t eightPixels = vld1q_u16(source + i);
251926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
252926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 11));
253926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 5), immediate0x3f));
254926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentB = vqmovn_u16(vandq_u16(eightPixels, immediate0x1f));
255926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
256926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        componentR = vorr_u8(vshl_n_u8(componentR, 3), vand_u8(componentR, immediate0x7));
257926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        componentG = vorr_u8(vshl_n_u8(componentG, 2), vand_u8(componentG, immediate0x3));
258926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        componentB = vorr_u8(vshl_n_u8(componentB, 3), vand_u8(componentB, immediate0x7));
259926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
260926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8x4_t destComponents = {{componentR, componentG, componentB, componentA}};
261926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        vst4_u8(destination, destComponents);
262926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        destination += 32;
263926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
264926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
265926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    source += pixelSize;
266926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    pixelsPerRow = tailPixels;
267926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
268926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
269926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort565(const uint8_t*& source, uint16_t*& destination, unsigned& pixelsPerRow)
270926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles){
271926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned componentsPerRow = pixelsPerRow * 4;
272926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned tailComponents = componentsPerRow % 32;
273926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    unsigned componentsSize = componentsPerRow - tailComponents;
274926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint8_t* dst = reinterpret_cast<uint8_t*>(destination);
275926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
276926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint8x8_t immediate0xf8 = vdup_n_u8(0xF8);
277926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    uint8x8_t immediate0x1c = vdup_n_u8(0x1C);
278926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    for (unsigned i = 0; i < componentsSize; i += 32) {
279926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8x4_t RGBA8 = vld4_u8(source + i);
280926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
281926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf8);
282926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentGLeft = vshr_n_u8(RGBA8.val[1], 5);
283926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentGRight = vshl_n_u8(vand_u8(RGBA8.val[1], immediate0x1c), 3);
284926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8_t componentB = vshr_n_u8(vand_u8(RGBA8.val[2], immediate0xf8), 3);
285926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
286926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        uint8x8x2_t RGB565;
287926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        RGB565.val[0] = vorr_u8(componentGRight, componentB);
288926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        RGB565.val[1] = vorr_u8(componentR, componentGLeft);
289926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        vst2_u8(dst, RGB565);
290926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        dst += 16;
291926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    }
292926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
293926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    source += componentsSize;
294926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    destination += componentsSize / 4;
295926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    pixelsPerRow = tailComponents / 4;
296926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)}
297926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
298926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)} // namespace SIMD
299926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
300c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)} // namespace blink
301926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
302926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)#endif // HAVE(ARM_NEON_INTRINSICS)
303926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)
30409380295ba73501a205346becac22c6978e4671dTorne (Richard Coles)#endif // WebGLImageConversionNEON_h
305