lp_bld_format_yuv.c revision 461c34c0cb0e23f11fd9390a37a62d9930a1c48e
1fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton/**************************************************************************
2e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton *
3fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * Copyright 2010 VMware, Inc.
4fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * All Rights Reserved.
5e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton *
6fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * Permission is hereby granted, free of charge, to any person obtaining a
7fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * copy of this software and associated documentation files (the
8fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * "Software"), to deal in the Software without restriction, including
9fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * without limitation the rights to use, copy, modify, merge, publish,
10fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * distribute, sub license, and/or sell copies of the Software, and to
11fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * permit persons to whom the Software is furnished to do so, subject to
12fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * the following conditions:
13e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton *
14fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * USE OR OTHER DEALINGS IN THE SOFTWARE.
21fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton *
22fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * The above copyright notice and this permission notice (including the
23fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * next paragraph) shall be included in all copies or substantial portions
24fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * of the Software.
25e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton *
26fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton **************************************************************************/
27fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton
28c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton
29b48676672592271597d07e5ece79cf4d3ffbe04bChristian König/**
302ef8c60e558938686196bf8ff4d22fd57903bf4cCooper Yuan * @file
31e44c85637a3298918e292e9ddba812856cf92924Younes Manton * YUV pixel format manipulation.
32b48676672592271597d07e5ece79cf4d3ffbe04bChristian König *
337ea550621e25f9b2f344b2ed60551ceec91b3fcfKai Wasserbäch * @author Jose Fonseca <jfonseca@vmware.com>
347ea550621e25f9b2f344b2ed60551ceec91b3fcfKai Wasserbäch */
357ea550621e25f9b2f344b2ed60551ceec91b3fcfKai Wasserbäch
367ea550621e25f9b2f344b2ed60551ceec91b3fcfKai Wasserbäch
37b48676672592271597d07e5ece79cf4d3ffbe04bChristian König#include "util/u_format.h"
387ea550621e25f9b2f344b2ed60551ceec91b3fcfKai Wasserbäch#include "util/u_cpu_detect.h"
39b48676672592271597d07e5ece79cf4d3ffbe04bChristian König
407ea550621e25f9b2f344b2ed60551ceec91b3fcfKai Wasserbäch#include "lp_bld_arit.h"
41fc0a5e21d77ae2f082fd19dd2295e84f6fb7bd3bChristian König#include "lp_bld_type.h"
42b48676672592271597d07e5ece79cf4d3ffbe04bChristian König#include "lp_bld_const.h"
43e44c85637a3298918e292e9ddba812856cf92924Younes Manton#include "lp_bld_conv.h"
44c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton#include "lp_bld_gather.h"
45e44c85637a3298918e292e9ddba812856cf92924Younes Manton#include "lp_bld_format.h"
46e44c85637a3298918e292e9ddba812856cf92924Younes Manton#include "lp_bld_init.h"
4770c44073ad3f333ed40c5c297a934a359c839e94Younes Manton#include "lp_bld_logic.h"
485eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton
495eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton/**
505eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton * Extract Y, U, V channels from packed UYVY.
51c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton * @param packed  is a <n x i32> vector with the packed UYVY blocks
52e44c85637a3298918e292e9ddba812856cf92924Younes Manton * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
53e44c85637a3298918e292e9ddba812856cf92924Younes Manton */
54e44c85637a3298918e292e9ddba812856cf92924Younes Mantonstatic void
55e44c85637a3298918e292e9ddba812856cf92924Younes Mantonuyvy_to_yuv_soa(struct gallivm_state *gallivm,
568b02f9e67b83e40019d6b07b9a035ba5d5042688Christian König                unsigned n,
57e44c85637a3298918e292e9ddba812856cf92924Younes Manton                LLVMValueRef packed,
58e44c85637a3298918e292e9ddba812856cf92924Younes Manton                LLVMValueRef i,
59e44c85637a3298918e292e9ddba812856cf92924Younes Manton                LLVMValueRef *y,
60e44c85637a3298918e292e9ddba812856cf92924Younes Manton                LLVMValueRef *u,
61e44c85637a3298918e292e9ddba812856cf92924Younes Manton                LLVMValueRef *v)
62e44c85637a3298918e292e9ddba812856cf92924Younes Manton{
63e44c85637a3298918e292e9ddba812856cf92924Younes Manton   LLVMBuilderRef builder = gallivm->builder;
6470c44073ad3f333ed40c5c297a934a359c839e94Younes Manton   struct lp_type type;
655eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton   LLVMValueRef mask;
665eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton
67e44c85637a3298918e292e9ddba812856cf92924Younes Manton   memset(&type, 0, sizeof type);
68e44c85637a3298918e292e9ddba812856cf92924Younes Manton   type.width = 32;
69e44c85637a3298918e292e9ddba812856cf92924Younes Manton   type.length = n;
70d52d51ab8ae1240f77b6c18c3e99be4bf4868037Younes Manton
71e44c85637a3298918e292e9ddba812856cf92924Younes Manton   assert(lp_check_value(type, packed));
72e44c85637a3298918e292e9ddba812856cf92924Younes Manton   assert(lp_check_value(type, i));
73e44c85637a3298918e292e9ddba812856cf92924Younes Manton
74e44c85637a3298918e292e9ddba812856cf92924Younes Manton   /*
75d52d51ab8ae1240f77b6c18c3e99be4bf4868037Younes Manton    * y = (uyvy >> (16*i + 8)) & 0xff
76d52d51ab8ae1240f77b6c18c3e99be4bf4868037Younes Manton    * u = (uyvy        ) & 0xff
77e44c85637a3298918e292e9ddba812856cf92924Younes Manton    * v = (uyvy >> 16  ) & 0xff
78e44c85637a3298918e292e9ddba812856cf92924Younes Manton    */
79e44c85637a3298918e292e9ddba812856cf92924Younes Manton
80e44c85637a3298918e292e9ddba812856cf92924Younes Manton#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
81e44c85637a3298918e292e9ddba812856cf92924Younes Manton   /*
82e44c85637a3298918e292e9ddba812856cf92924Younes Manton    * Avoid shift with per-element count.
83e44c85637a3298918e292e9ddba812856cf92924Younes Manton    * No support on x86, gets translated to roughly 5 instructions
84e44c85637a3298918e292e9ddba812856cf92924Younes Manton    * per element. Didn't measure performance but cuts shader size
85d52d51ab8ae1240f77b6c18c3e99be4bf4868037Younes Manton    * by quite a bit (less difference if cpu has no sse4.1 support).
86e44c85637a3298918e292e9ddba812856cf92924Younes Manton    */
87e44c85637a3298918e292e9ddba812856cf92924Younes Manton   if (util_cpu_caps.has_sse2 && n == 4) {
88e44c85637a3298918e292e9ddba812856cf92924Younes Manton      LLVMValueRef sel, tmp, tmp2;
89e44c85637a3298918e292e9ddba812856cf92924Younes Manton      struct lp_build_context bld32;
90d52d51ab8ae1240f77b6c18c3e99be4bf4868037Younes Manton
91e44c85637a3298918e292e9ddba812856cf92924Younes Manton      lp_build_context_init(&bld32, gallivm, type);
92e44c85637a3298918e292e9ddba812856cf92924Younes Manton
93e44c85637a3298918e292e9ddba812856cf92924Younes Manton      tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
94e44c85637a3298918e292e9ddba812856cf92924Younes Manton      tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), "");
95e44c85637a3298918e292e9ddba812856cf92924Younes Manton      sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
96e44c85637a3298918e292e9ddba812856cf92924Younes Manton      *y = lp_build_select(&bld32, sel, tmp, tmp2);
97e44c85637a3298918e292e9ddba812856cf92924Younes Manton   } else
98e44c85637a3298918e292e9ddba812856cf92924Younes Manton#endif
9970c44073ad3f333ed40c5c297a934a359c839e94Younes Manton   {
1005eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton      LLVMValueRef shift;
1015eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
102e44c85637a3298918e292e9ddba812856cf92924Younes Manton      shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
1038580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton      *y = LLVMBuildLShr(builder, packed, shift, "");
1045eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton   }
1058580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton
1065eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton   *u = packed;
1078580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
1088580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton
1098580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton   mask = lp_build_const_int_vec(gallivm, type, 0xff);
1105eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton
1115eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton   *y = LLVMBuildAnd(builder, *y, mask, "y");
1125eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton   *u = LLVMBuildAnd(builder, *u, mask, "u");
1135eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton   *v = LLVMBuildAnd(builder, *v, mask, "v");
114e44c85637a3298918e292e9ddba812856cf92924Younes Manton}
115e44c85637a3298918e292e9ddba812856cf92924Younes Manton
116d52d51ab8ae1240f77b6c18c3e99be4bf4868037Younes Manton
117e44c85637a3298918e292e9ddba812856cf92924Younes Manton/**
118e44c85637a3298918e292e9ddba812856cf92924Younes Manton * Extract Y, U, V channels from packed YUYV.
119e44c85637a3298918e292e9ddba812856cf92924Younes Manton * @param packed  is a <n x i32> vector with the packed YUYV blocks
120e44c85637a3298918e292e9ddba812856cf92924Younes Manton * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
121e44c85637a3298918e292e9ddba812856cf92924Younes Manton */
122e44c85637a3298918e292e9ddba812856cf92924Younes Mantonstatic void
1238580b7a0eeed3fc29320b2c0a184084e4267661aYounes Mantonyuyv_to_yuv_soa(struct gallivm_state *gallivm,
1248580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton                unsigned n,
125e44c85637a3298918e292e9ddba812856cf92924Younes Manton                LLVMValueRef packed,
1268580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton                LLVMValueRef i,
1278580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton                LLVMValueRef *y,
1288580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton                LLVMValueRef *u,
129e44c85637a3298918e292e9ddba812856cf92924Younes Manton                LLVMValueRef *v)
1308580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton{
1318580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton   LLVMBuilderRef builder = gallivm->builder;
1328580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton   struct lp_type type;
1338580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton   LLVMValueRef mask;
134e44c85637a3298918e292e9ddba812856cf92924Younes Manton
1358580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton   memset(&type, 0, sizeof type);
1368580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton   type.width = 32;
1378580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton   type.length = n;
138e44c85637a3298918e292e9ddba812856cf92924Younes Manton
1398580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton   assert(lp_check_value(type, packed));
140e44c85637a3298918e292e9ddba812856cf92924Younes Manton   assert(lp_check_value(type, i));
141e44c85637a3298918e292e9ddba812856cf92924Younes Manton
142c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton   /*
143c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton    * y = (yuyv >> 16*i) & 0xff
144e44c85637a3298918e292e9ddba812856cf92924Younes Manton    * u = (yuyv >> 8   ) & 0xff
1456858dd50c9b696c1c6044f5a403000f9d20b286bYounes Manton    * v = (yuyv >> 24  ) & 0xff
146e44c85637a3298918e292e9ddba812856cf92924Younes Manton    */
147e44c85637a3298918e292e9ddba812856cf92924Younes Manton
148e44c85637a3298918e292e9ddba812856cf92924Younes Manton#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
149e44c85637a3298918e292e9ddba812856cf92924Younes Manton   /*
150e44c85637a3298918e292e9ddba812856cf92924Younes Manton    * Avoid shift with per-element count.
151e44c85637a3298918e292e9ddba812856cf92924Younes Manton    * No support on x86, gets translated to roughly 5 instructions
152e44c85637a3298918e292e9ddba812856cf92924Younes Manton    * per element. Didn't measure performance but cuts shader size
153e44c85637a3298918e292e9ddba812856cf92924Younes Manton    * by quite a bit (less difference if cpu has no sse4.1 support).
154e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton    */
155e44c85637a3298918e292e9ddba812856cf92924Younes Manton   if (util_cpu_caps.has_sse2 && n == 4) {
156e44c85637a3298918e292e9ddba812856cf92924Younes Manton      LLVMValueRef sel, tmp;
1578580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton      struct lp_build_context bld32;
1588580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton
159e44c85637a3298918e292e9ddba812856cf92924Younes Manton      lp_build_context_init(&bld32, gallivm, type);
1606858dd50c9b696c1c6044f5a403000f9d20b286bYounes Manton
1616858dd50c9b696c1c6044f5a403000f9d20b286bYounes Manton      tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
162e44c85637a3298918e292e9ddba812856cf92924Younes Manton      sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
1636858dd50c9b696c1c6044f5a403000f9d20b286bYounes Manton       *y = lp_build_select(&bld32, sel, packed, tmp);
164d52d51ab8ae1240f77b6c18c3e99be4bf4868037Younes Manton   } else
165e44c85637a3298918e292e9ddba812856cf92924Younes Manton#endif
166e44c85637a3298918e292e9ddba812856cf92924Younes Manton   {
167e44c85637a3298918e292e9ddba812856cf92924Younes Manton      LLVMValueRef shift;
168e44c85637a3298918e292e9ddba812856cf92924Younes Manton      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
169e44c85637a3298918e292e9ddba812856cf92924Younes Manton      *y = LLVMBuildLShr(builder, packed, shift, "");
170e44c85637a3298918e292e9ddba812856cf92924Younes Manton   }
171e44c85637a3298918e292e9ddba812856cf92924Younes Manton
172e44c85637a3298918e292e9ddba812856cf92924Younes Manton   *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
173e44c85637a3298918e292e9ddba812856cf92924Younes Manton   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
174e44c85637a3298918e292e9ddba812856cf92924Younes Manton
1758580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton   mask = lp_build_const_int_vec(gallivm, type, 0xff);
1768580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton
177e44c85637a3298918e292e9ddba812856cf92924Younes Manton   *y = LLVMBuildAnd(builder, *y, mask, "y");
1786858dd50c9b696c1c6044f5a403000f9d20b286bYounes Manton   *u = LLVMBuildAnd(builder, *u, mask, "u");
1796858dd50c9b696c1c6044f5a403000f9d20b286bYounes Manton   *v = LLVMBuildAnd(builder, *v, mask, "v");
1803107b54b011c7ceef2b314632bdcf0b87c5e4d36Younes Manton}
181e44c85637a3298918e292e9ddba812856cf92924Younes Manton
182e44c85637a3298918e292e9ddba812856cf92924Younes Manton
183c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Mantonstatic INLINE void
184e44c85637a3298918e292e9ddba812856cf92924Younes Mantonyuv_to_rgb_soa(struct gallivm_state *gallivm,
1853d40d4f391e2fc319a03d8f171a2cfb9daf250c8Christian König               unsigned n,
1863d40d4f391e2fc319a03d8f171a2cfb9daf250c8Christian König               LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
1873d40d4f391e2fc319a03d8f171a2cfb9daf250c8Christian König               LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
1883d40d4f391e2fc319a03d8f171a2cfb9daf250c8Christian König{
1893d40d4f391e2fc319a03d8f171a2cfb9daf250c8Christian König   LLVMBuilderRef builder = gallivm->builder;
1903d40d4f391e2fc319a03d8f171a2cfb9daf250c8Christian König   struct lp_type type;
191e44c85637a3298918e292e9ddba812856cf92924Younes Manton   struct lp_build_context bld;
192e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton
1931448e829e86981e6144410ba6a3d0f16357fb2b3Christian König   LLVMValueRef c0;
194e44c85637a3298918e292e9ddba812856cf92924Younes Manton   LLVMValueRef c8;
19514766f820069ca987543918bce96410c481e5d20Christian König   LLVMValueRef c16;
196e44c85637a3298918e292e9ddba812856cf92924Younes Manton   LLVMValueRef c128;
1978580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton   LLVMValueRef c255;
1988580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton
199e44c85637a3298918e292e9ddba812856cf92924Younes Manton   LLVMValueRef cy;
200e44c85637a3298918e292e9ddba812856cf92924Younes Manton   LLVMValueRef cug;
201e44c85637a3298918e292e9ddba812856cf92924Younes Manton   LLVMValueRef cub;
202e44c85637a3298918e292e9ddba812856cf92924Younes Manton   LLVMValueRef cvr;
203e44c85637a3298918e292e9ddba812856cf92924Younes Manton   LLVMValueRef cvg;
204e44c85637a3298918e292e9ddba812856cf92924Younes Manton
2055eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton   memset(&type, 0, sizeof type);
2065eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton   type.sign = TRUE;
207e44c85637a3298918e292e9ddba812856cf92924Younes Manton   type.width = 32;
208e44c85637a3298918e292e9ddba812856cf92924Younes Manton   type.length = n;
209e44c85637a3298918e292e9ddba812856cf92924Younes Manton
210e44c85637a3298918e292e9ddba812856cf92924Younes Manton   lp_build_context_init(&bld, gallivm, type);
211e44c85637a3298918e292e9ddba812856cf92924Younes Manton
21270c44073ad3f333ed40c5c297a934a359c839e94Younes Manton   assert(lp_check_value(type, y));
21370c44073ad3f333ed40c5c297a934a359c839e94Younes Manton   assert(lp_check_value(type, u));
2148580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton   assert(lp_check_value(type, v));
21570c44073ad3f333ed40c5c297a934a359c839e94Younes Manton
21670c44073ad3f333ed40c5c297a934a359c839e94Younes Manton   /*
217fcdf50f74befad8d89eb3f9cdfd88b82d1daa98cChristian König    * Constants
21842c7291d2cb50c2bd94dd9346a8402a24303d66dChristian König    */
21970c44073ad3f333ed40c5c297a934a359c839e94Younes Manton
22070c44073ad3f333ed40c5c297a934a359c839e94Younes Manton   c0   = lp_build_const_int_vec(gallivm, type,   0);
2213cbe27a9888b94d1ab24b5e76ebd7563a7d8c6b8Christian König   c8   = lp_build_const_int_vec(gallivm, type,   8);
2223cbe27a9888b94d1ab24b5e76ebd7563a7d8c6b8Christian König   c16  = lp_build_const_int_vec(gallivm, type,  16);
22370c44073ad3f333ed40c5c297a934a359c839e94Younes Manton   c128 = lp_build_const_int_vec(gallivm, type, 128);
22470c44073ad3f333ed40c5c297a934a359c839e94Younes Manton   c255 = lp_build_const_int_vec(gallivm, type, 255);
22570c44073ad3f333ed40c5c297a934a359c839e94Younes Manton
226e44c85637a3298918e292e9ddba812856cf92924Younes Manton   cy  = lp_build_const_int_vec(gallivm, type,  298);
227e44c85637a3298918e292e9ddba812856cf92924Younes Manton   cug = lp_build_const_int_vec(gallivm, type, -100);
228e44c85637a3298918e292e9ddba812856cf92924Younes Manton   cub = lp_build_const_int_vec(gallivm, type,  516);
229e44c85637a3298918e292e9ddba812856cf92924Younes Manton   cvr = lp_build_const_int_vec(gallivm, type,  409);
230e44c85637a3298918e292e9ddba812856cf92924Younes Manton   cvg = lp_build_const_int_vec(gallivm, type, -208);
231e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton
232e44c85637a3298918e292e9ddba812856cf92924Younes Manton   /*
233e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton    *  y -= 16;
2348580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton    *  u -= 128;
235e44c85637a3298918e292e9ddba812856cf92924Younes Manton    *  v -= 128;
236e44c85637a3298918e292e9ddba812856cf92924Younes Manton    */
237e44c85637a3298918e292e9ddba812856cf92924Younes Manton
238e44c85637a3298918e292e9ddba812856cf92924Younes Manton   y = LLVMBuildSub(builder, y, c16, "");
2391448e829e86981e6144410ba6a3d0f16357fb2b3Christian König   u = LLVMBuildSub(builder, u, c128, "");
2401448e829e86981e6144410ba6a3d0f16357fb2b3Christian König   v = LLVMBuildSub(builder, v, c128, "");
2418580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton
242e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton   /*
243e44c85637a3298918e292e9ddba812856cf92924Younes Manton    * r = 298 * _y            + 409 * _v + 128;
244e44c85637a3298918e292e9ddba812856cf92924Younes Manton    * g = 298 * _y - 100 * _u - 208 * _v + 128;
245e44c85637a3298918e292e9ddba812856cf92924Younes Manton    * b = 298 * _y + 516 * _u            + 128;
246e44c85637a3298918e292e9ddba812856cf92924Younes Manton    */
2471448e829e86981e6144410ba6a3d0f16357fb2b3Christian König
248ea78480029450c019287c2a94d7c42a6a1d12dc3Christian König   y = LLVMBuildMul(builder, y, cy, "");
2491448e829e86981e6144410ba6a3d0f16357fb2b3Christian König   y = LLVMBuildAdd(builder, y, c128, "");
250ea78480029450c019287c2a94d7c42a6a1d12dc3Christian König
251ea78480029450c019287c2a94d7c42a6a1d12dc3Christian König   *r = LLVMBuildMul(builder, v, cvr, "");
2528c2bfa34a0d70ab08de44e3b091b3a097abbad97Christian König   *g = LLVMBuildAdd(builder,
2538c2bfa34a0d70ab08de44e3b091b3a097abbad97Christian König                     LLVMBuildMul(builder, u, cug, ""),
254ea78480029450c019287c2a94d7c42a6a1d12dc3Christian König                     LLVMBuildMul(builder, v, cvg, ""),
255d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König                     "");
256d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König   *b = LLVMBuildMul(builder, u, cub, "");
257d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König
2581448e829e86981e6144410ba6a3d0f16357fb2b3Christian König   *r = LLVMBuildAdd(builder, *r, y, "");
259d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König   *g = LLVMBuildAdd(builder, *g, y, "");
260d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König   *b = LLVMBuildAdd(builder, *b, y, "");
261d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König
262d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König   /*
263d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König    * r >>= 8;
2641448e829e86981e6144410ba6a3d0f16357fb2b3Christian König    * g >>= 8;
265e5f78a74f8294ee02015552db664dae1e7da9f47Christian König    * b >>= 8;
266d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König    */
2671448e829e86981e6144410ba6a3d0f16357fb2b3Christian König
268e5f78a74f8294ee02015552db664dae1e7da9f47Christian König   *r = LLVMBuildAShr(builder, *r, c8, "r");
269e5f78a74f8294ee02015552db664dae1e7da9f47Christian König   *g = LLVMBuildAShr(builder, *g, c8, "g");
270e5f78a74f8294ee02015552db664dae1e7da9f47Christian König   *b = LLVMBuildAShr(builder, *b, c8, "b");
271e5f78a74f8294ee02015552db664dae1e7da9f47Christian König
272e5f78a74f8294ee02015552db664dae1e7da9f47Christian König   /*
27332c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König    * Clamp
27432c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König    */
27532c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König
27632c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König   *r = lp_build_clamp(&bld, *r, c0, c255);
27732c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König   *g = lp_build_clamp(&bld, *g, c0, c255);
27832c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König   *b = lp_build_clamp(&bld, *b, c0, c255);
27932c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König}
28032c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König
28132c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König
28232c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian Königstatic LLVMValueRef
28332c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian Königrgb_to_rgba_aos(struct gallivm_state *gallivm,
284c7b65dcaffeb9d0760c8ecad052f4c79297bfc8aChristian König                unsigned n,
285c7b65dcaffeb9d0760c8ecad052f4c79297bfc8aChristian König                LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
286c7b65dcaffeb9d0760c8ecad052f4c79297bfc8aChristian König{
287c7b65dcaffeb9d0760c8ecad052f4c79297bfc8aChristian König   LLVMBuilderRef builder = gallivm->builder;
288c7b65dcaffeb9d0760c8ecad052f4c79297bfc8aChristian König   struct lp_type type;
28962db9b21da6ccad6301feae9b90d53d46224c854Younes Manton   LLVMValueRef a;
29062db9b21da6ccad6301feae9b90d53d46224c854Younes Manton   LLVMValueRef rgba;
291c7b65dcaffeb9d0760c8ecad052f4c79297bfc8aChristian König
29214766f820069ca987543918bce96410c481e5d20Christian König   memset(&type, 0, sizeof type);
29362db9b21da6ccad6301feae9b90d53d46224c854Younes Manton   type.sign = TRUE;
29414766f820069ca987543918bce96410c481e5d20Christian König   type.width = 32;
29562db9b21da6ccad6301feae9b90d53d46224c854Younes Manton   type.length = n;
2961448e829e86981e6144410ba6a3d0f16357fb2b3Christian König
2971448e829e86981e6144410ba6a3d0f16357fb2b3Christian König   assert(lp_check_value(type, r));
2985eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton   assert(lp_check_value(type, g));
2995eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton   assert(lp_check_value(type, b));
300e44c85637a3298918e292e9ddba812856cf92924Younes Manton
301e44c85637a3298918e292e9ddba812856cf92924Younes Manton   /*
302e44c85637a3298918e292e9ddba812856cf92924Younes Manton    * Make a 4 x unorm8 vector
303e44c85637a3298918e292e9ddba812856cf92924Younes Manton    */
304e44c85637a3298918e292e9ddba812856cf92924Younes Manton
305e44c85637a3298918e292e9ddba812856cf92924Younes Manton   r = r;
306e44c85637a3298918e292e9ddba812856cf92924Younes Manton   g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
307e44c85637a3298918e292e9ddba812856cf92924Younes Manton   b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
308e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton   a = lp_build_const_int_vec(gallivm, type, 0xff000000);
309e44c85637a3298918e292e9ddba812856cf92924Younes Manton
310e44c85637a3298918e292e9ddba812856cf92924Younes Manton   rgba = r;
3118580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton   rgba = LLVMBuildOr(builder, rgba, g, "");
3128580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton   rgba = LLVMBuildOr(builder, rgba, b, "");
313e44c85637a3298918e292e9ddba812856cf92924Younes Manton   rgba = LLVMBuildOr(builder, rgba, a, "");
314c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton
315c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton   rgba = LLVMBuildBitCast(builder, rgba,
3163107b54b011c7ceef2b314632bdcf0b87c5e4d36Younes Manton                           LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");
317e44c85637a3298918e292e9ddba812856cf92924Younes Manton
318c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton   return rgba;
319e44c85637a3298918e292e9ddba812856cf92924Younes Manton}
3206858dd50c9b696c1c6044f5a403000f9d20b286bYounes Manton
3218580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton
3228580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton/**
323e44c85637a3298918e292e9ddba812856cf92924Younes Manton * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
324c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton */
325e44c85637a3298918e292e9ddba812856cf92924Younes Mantonstatic LLVMValueRef
326e44c85637a3298918e292e9ddba812856cf92924Younes Mantonuyvy_to_rgba_aos(struct gallivm_state *gallivm,
32790bd0e338d315c426c2d0255331610055023739eYounes Manton                 unsigned n,
328e44c85637a3298918e292e9ddba812856cf92924Younes Manton                 LLVMValueRef packed,
329d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König                 LLVMValueRef i)
33032c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König{
331bd5fd67a3e3cda4b7676dd4745fc5d5524709210Christian König   LLVMValueRef y, u, v;
3321448e829e86981e6144410ba6a3d0f16357fb2b3Christian König   LLVMValueRef r, g, b;
3331448e829e86981e6144410ba6a3d0f16357fb2b3Christian König   LLVMValueRef rgba;
334e44c85637a3298918e292e9ddba812856cf92924Younes Manton
335e44c85637a3298918e292e9ddba812856cf92924Younes Manton   uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
33690bd0e338d315c426c2d0255331610055023739eYounes Manton   yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
3378580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
3388580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton
339e44c85637a3298918e292e9ddba812856cf92924Younes Manton   return rgba;
34090bd0e338d315c426c2d0255331610055023739eYounes Manton}
341
342
343/**
344 * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
345 */
346static LLVMValueRef
347yuyv_to_rgba_aos(struct gallivm_state *gallivm,
348                 unsigned n,
349                 LLVMValueRef packed,
350                 LLVMValueRef i)
351{
352   LLVMValueRef y, u, v;
353   LLVMValueRef r, g, b;
354   LLVMValueRef rgba;
355
356   yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
357   yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
358   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
359
360   return rgba;
361}
362
363
364/**
365 * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
366 */
367static LLVMValueRef
368rgbg_to_rgba_aos(struct gallivm_state *gallivm,
369                 unsigned n,
370                 LLVMValueRef packed,
371                 LLVMValueRef i)
372{
373   LLVMValueRef r, g, b;
374   LLVMValueRef rgba;
375
376   uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
377   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
378
379   return rgba;
380}
381
382
383/**
384 * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
385 */
386static LLVMValueRef
387grgb_to_rgba_aos(struct gallivm_state *gallivm,
388                 unsigned n,
389                 LLVMValueRef packed,
390                 LLVMValueRef i)
391{
392   LLVMValueRef r, g, b;
393   LLVMValueRef rgba;
394
395   yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
396   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
397
398   return rgba;
399}
400
401/**
402 * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS
403 */
404static LLVMValueRef
405grbr_to_rgba_aos(struct gallivm_state *gallivm,
406                 unsigned n,
407                 LLVMValueRef packed,
408                 LLVMValueRef i)
409{
410   LLVMValueRef r, g, b;
411   LLVMValueRef rgba;
412
413   uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
414   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
415
416   return rgba;
417}
418
419
420/**
421 * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS
422 */
423static LLVMValueRef
424rgrb_to_rgba_aos(struct gallivm_state *gallivm,
425                 unsigned n,
426                 LLVMValueRef packed,
427                 LLVMValueRef i)
428{
429   LLVMValueRef r, g, b;
430   LLVMValueRef rgba;
431
432   yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
433   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
434
435   return rgba;
436}
437
438/**
439 * @param n  is the number of pixels processed
440 * @param packed  is a <n x i32> vector with the packed YUYV blocks
441 * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
442 * @return  a <4*n x i8> vector with the pixel RGBA values in AoS
443 */
444LLVMValueRef
445lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
446                                   const struct util_format_description *format_desc,
447                                   unsigned n,
448                                   LLVMValueRef base_ptr,
449                                   LLVMValueRef offset,
450                                   LLVMValueRef i,
451                                   LLVMValueRef j)
452{
453   LLVMValueRef packed;
454   LLVMValueRef rgba;
455
456   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
457   assert(format_desc->block.bits == 32);
458   assert(format_desc->block.width == 2);
459   assert(format_desc->block.height == 1);
460
461   packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset);
462
463   (void)j;
464
465   switch (format_desc->format) {
466   case PIPE_FORMAT_UYVY:
467      rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);
468      break;
469   case PIPE_FORMAT_YUYV:
470      rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);
471      break;
472   case PIPE_FORMAT_R8G8_B8G8_UNORM:
473      rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);
474      break;
475   case PIPE_FORMAT_G8R8_G8B8_UNORM:
476      rgba = grgb_to_rgba_aos(gallivm, n, packed, i);
477      break;
478   case PIPE_FORMAT_G8R8_B8R8_UNORM:
479      rgba = grbr_to_rgba_aos(gallivm, n, packed, i);
480      break;
481   case PIPE_FORMAT_R8G8_R8B8_UNORM:
482      rgba = rgrb_to_rgba_aos(gallivm, n, packed, i);
483      break;
484   default:
485      assert(0);
486      rgba =  LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));
487      break;
488   }
489
490   return rgba;
491}
492
493