lp_bld_format_yuv.c revision 3469715a8a171512cf9b528702e70393f01c6041
1bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea/**************************************************************************
2bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *
3bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * Copyright 2010 VMware, Inc.
4bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * All Rights Reserved.
5bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *
6bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * Permission is hereby granted, free of charge, to any person obtaining a
7bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * copy of this software and associated documentation files (the
8bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * "Software"), to deal in the Software without restriction, including
9bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * without limitation the rights to use, copy, modify, merge, publish,
10bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * distribute, sub license, and/or sell copies of the Software, and to
11bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * permit persons to whom the Software is furnished to do so, subject to
12bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * the following conditions:
13bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *
14bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * USE OR OTHER DEALINGS IN THE SOFTWARE.
21bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *
22bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * The above copyright notice and this permission notice (including the
23bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * next paragraph) shall be included in all copies or substantial portions
24bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * of the Software.
25bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *
26bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea **************************************************************************/
27bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
28bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
29bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea/**
30bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * @file
31bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * YUV pixel format manipulation.
32bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *
33bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * @author Jose Fonseca <jfonseca@vmware.com>
34bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea */
35bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
36bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
37bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "util/u_format.h"
38bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "util/u_cpu_detect.h"
39bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
40bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "lp_bld_arit.h"
41bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "lp_bld_type.h"
42bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "lp_bld_const.h"
43bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "lp_bld_conv.h"
44bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "lp_bld_gather.h"
45bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "lp_bld_format.h"
46bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "lp_bld_init.h"
47bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "lp_bld_logic.h"
48bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
49bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea/**
50bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * Extract Y, U, V channels from packed UYVY.
51bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * @param packed  is a <n x i32> vector with the packed UYVY blocks
52bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
53bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea */
54bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleastatic void
55bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleauyvy_to_yuv_soa(struct gallivm_state *gallivm,
56bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea                unsigned n,
57bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea                LLVMValueRef packed,
58bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea                LLVMValueRef i,
59bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea                LLVMValueRef *y,
60bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea                LLVMValueRef *u,
61bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea                LLVMValueRef *v)
62bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{
63bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   LLVMBuilderRef builder = gallivm->builder;
64bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   struct lp_type type;
65bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   LLVMValueRef mask;
66bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
67bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   memset(&type, 0, sizeof type);
68bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   type.width = 32;
69bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   type.length = n;
70bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
71bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   assert(lp_check_value(type, packed));
72bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   assert(lp_check_value(type, i));
73bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
74bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   /*
75bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    * y = (uyvy >> (16*i + 8)) & 0xff
76bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    * u = (uyvy        ) & 0xff
77bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    * v = (uyvy >> 16  ) & 0xff
78bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    */
79bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
80bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
81bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   /*
82bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    * Avoid shift with per-element count.
83bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    * No support on x86, gets translated to roughly 5 instructions
84bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    * per element. Didn't measure performance but cuts shader size
85bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    * by quite a bit (less difference if cpu has no sse4.1 support).
86bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    */
87bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   if (util_cpu_caps.has_sse2 && n > 1) {
88bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      LLVMValueRef sel, tmp, tmp2;
89bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      struct lp_build_context bld32;
90bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
91bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      lp_build_context_init(&bld32, gallivm, type);
92bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
93bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
94bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), "");
95bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
96bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      *y = lp_build_select(&bld32, sel, tmp, tmp2);
97bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   } else
98bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#endif
99bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   {
100bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      LLVMValueRef shift;
101bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
102bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
103bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      *y = LLVMBuildLShr(builder, packed, shift, "");
104bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   }
105bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
106bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   *u = packed;
107bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
108bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
109bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   mask = lp_build_const_int_vec(gallivm, type, 0xff);
110bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
111bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   *y = LLVMBuildAnd(builder, *y, mask, "y");
112bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   *u = LLVMBuildAnd(builder, *u, mask, "u");
113bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   *v = LLVMBuildAnd(builder, *v, mask, "v");
114bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea}
115bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
116bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
117bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea/**
118bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * Extract Y, U, V channels from packed YUYV.
119bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * @param packed  is a <n x i32> vector with the packed YUYV blocks
120bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
121bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea */
122bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleastatic void
123bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleayuyv_to_yuv_soa(struct gallivm_state *gallivm,
124bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea                unsigned n,
125bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea                LLVMValueRef packed,
126bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea                LLVMValueRef i,
127bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea                LLVMValueRef *y,
128bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea                LLVMValueRef *u,
129bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea                LLVMValueRef *v)
130bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{
131bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   LLVMBuilderRef builder = gallivm->builder;
132bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   struct lp_type type;
133bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   LLVMValueRef mask;
134bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
135bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   memset(&type, 0, sizeof type);
136bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   type.width = 32;
137bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   type.length = n;
138bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
139bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   assert(lp_check_value(type, packed));
140bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   assert(lp_check_value(type, i));
141bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
142bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   /*
143bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    * y = (yuyv >> 16*i) & 0xff
144bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    * u = (yuyv >> 8   ) & 0xff
145bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    * v = (yuyv >> 24  ) & 0xff
146bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    */
147bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
148bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
149bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   /*
150bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    * Avoid shift with per-element count.
151bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    * No support on x86, gets translated to roughly 5 instructions
152bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    * per element. Didn't measure performance but cuts shader size
153bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    * by quite a bit (less difference if cpu has no sse4.1 support).
154bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea    */
155bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   if (util_cpu_caps.has_sse2 && n > 1) {
156bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      LLVMValueRef sel, tmp;
157bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      struct lp_build_context bld32;
158bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
159bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      lp_build_context_init(&bld32, gallivm, type);
160bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
161bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
162bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
163bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea       *y = lp_build_select(&bld32, sel, packed, tmp);
164bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   } else
165bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#endif
166bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   {
167bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      LLVMValueRef shift;
168bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
169bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea      *y = LLVMBuildLShr(builder, packed, shift, "");
170bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   }
171bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
172bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
173bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
174bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
175bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   mask = lp_build_const_int_vec(gallivm, type, 0xff);
176bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
177bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   *y = LLVMBuildAnd(builder, *y, mask, "y");
178bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   *u = LLVMBuildAnd(builder, *u, mask, "u");
179bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   *v = LLVMBuildAnd(builder, *v, mask, "v");
180bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea}
181bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
182bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
183bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleastatic INLINE void
184bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleayuv_to_rgb_soa(struct gallivm_state *gallivm,
185bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea               unsigned n,
186bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea               LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
187bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea               LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
188bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{
189bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   LLVMBuilderRef builder = gallivm->builder;
190bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   struct lp_type type;
191bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   struct lp_build_context bld;
192bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea
193bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   LLVMValueRef c0;
194bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   LLVMValueRef c8;
195bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   LLVMValueRef c16;
196bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   LLVMValueRef c128;
197bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea   LLVMValueRef c255;
198
199   LLVMValueRef cy;
200   LLVMValueRef cug;
201   LLVMValueRef cub;
202   LLVMValueRef cvr;
203   LLVMValueRef cvg;
204
205   memset(&type, 0, sizeof type);
206   type.sign = TRUE;
207   type.width = 32;
208   type.length = n;
209
210   lp_build_context_init(&bld, gallivm, type);
211
212   assert(lp_check_value(type, y));
213   assert(lp_check_value(type, u));
214   assert(lp_check_value(type, v));
215
216   /*
217    * Constants
218    */
219
220   c0   = lp_build_const_int_vec(gallivm, type,   0);
221   c8   = lp_build_const_int_vec(gallivm, type,   8);
222   c16  = lp_build_const_int_vec(gallivm, type,  16);
223   c128 = lp_build_const_int_vec(gallivm, type, 128);
224   c255 = lp_build_const_int_vec(gallivm, type, 255);
225
226   cy  = lp_build_const_int_vec(gallivm, type,  298);
227   cug = lp_build_const_int_vec(gallivm, type, -100);
228   cub = lp_build_const_int_vec(gallivm, type,  516);
229   cvr = lp_build_const_int_vec(gallivm, type,  409);
230   cvg = lp_build_const_int_vec(gallivm, type, -208);
231
232   /*
233    *  y -= 16;
234    *  u -= 128;
235    *  v -= 128;
236    */
237
238   y = LLVMBuildSub(builder, y, c16, "");
239   u = LLVMBuildSub(builder, u, c128, "");
240   v = LLVMBuildSub(builder, v, c128, "");
241
242   /*
243    * r = 298 * _y            + 409 * _v + 128;
244    * g = 298 * _y - 100 * _u - 208 * _v + 128;
245    * b = 298 * _y + 516 * _u            + 128;
246    */
247
248   y = LLVMBuildMul(builder, y, cy, "");
249   y = LLVMBuildAdd(builder, y, c128, "");
250
251   *r = LLVMBuildMul(builder, v, cvr, "");
252   *g = LLVMBuildAdd(builder,
253                     LLVMBuildMul(builder, u, cug, ""),
254                     LLVMBuildMul(builder, v, cvg, ""),
255                     "");
256   *b = LLVMBuildMul(builder, u, cub, "");
257
258   *r = LLVMBuildAdd(builder, *r, y, "");
259   *g = LLVMBuildAdd(builder, *g, y, "");
260   *b = LLVMBuildAdd(builder, *b, y, "");
261
262   /*
263    * r >>= 8;
264    * g >>= 8;
265    * b >>= 8;
266    */
267
268   *r = LLVMBuildAShr(builder, *r, c8, "r");
269   *g = LLVMBuildAShr(builder, *g, c8, "g");
270   *b = LLVMBuildAShr(builder, *b, c8, "b");
271
272   /*
273    * Clamp
274    */
275
276   *r = lp_build_clamp(&bld, *r, c0, c255);
277   *g = lp_build_clamp(&bld, *g, c0, c255);
278   *b = lp_build_clamp(&bld, *b, c0, c255);
279}
280
281
282static LLVMValueRef
283rgb_to_rgba_aos(struct gallivm_state *gallivm,
284                unsigned n,
285                LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
286{
287   LLVMBuilderRef builder = gallivm->builder;
288   struct lp_type type;
289   LLVMValueRef a;
290   LLVMValueRef rgba;
291
292   memset(&type, 0, sizeof type);
293   type.sign = TRUE;
294   type.width = 32;
295   type.length = n;
296
297   assert(lp_check_value(type, r));
298   assert(lp_check_value(type, g));
299   assert(lp_check_value(type, b));
300
301   /*
302    * Make a 4 x unorm8 vector
303    */
304
305   r = r;
306   g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
307   b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
308   a = lp_build_const_int_vec(gallivm, type, 0xff000000);
309
310   rgba = r;
311   rgba = LLVMBuildOr(builder, rgba, g, "");
312   rgba = LLVMBuildOr(builder, rgba, b, "");
313   rgba = LLVMBuildOr(builder, rgba, a, "");
314
315   rgba = LLVMBuildBitCast(builder, rgba,
316                           LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");
317
318   return rgba;
319}
320
321
322/**
323 * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
324 */
325static LLVMValueRef
326uyvy_to_rgba_aos(struct gallivm_state *gallivm,
327                 unsigned n,
328                 LLVMValueRef packed,
329                 LLVMValueRef i)
330{
331   LLVMValueRef y, u, v;
332   LLVMValueRef r, g, b;
333   LLVMValueRef rgba;
334
335   uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
336   yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
337   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
338
339   return rgba;
340}
341
342
343/**
344 * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
345 */
346static LLVMValueRef
347yuyv_to_rgba_aos(struct gallivm_state *gallivm,
348                 unsigned n,
349                 LLVMValueRef packed,
350                 LLVMValueRef i)
351{
352   LLVMValueRef y, u, v;
353   LLVMValueRef r, g, b;
354   LLVMValueRef rgba;
355
356   yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
357   yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
358   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
359
360   return rgba;
361}
362
363
364/**
365 * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
366 */
367static LLVMValueRef
368rgbg_to_rgba_aos(struct gallivm_state *gallivm,
369                 unsigned n,
370                 LLVMValueRef packed,
371                 LLVMValueRef i)
372{
373   LLVMValueRef r, g, b;
374   LLVMValueRef rgba;
375
376   uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
377   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
378
379   return rgba;
380}
381
382
383/**
384 * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
385 */
386static LLVMValueRef
387grgb_to_rgba_aos(struct gallivm_state *gallivm,
388                 unsigned n,
389                 LLVMValueRef packed,
390                 LLVMValueRef i)
391{
392   LLVMValueRef r, g, b;
393   LLVMValueRef rgba;
394
395   yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
396   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
397
398   return rgba;
399}
400
401/**
402 * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS
403 */
404static LLVMValueRef
405grbr_to_rgba_aos(struct gallivm_state *gallivm,
406                 unsigned n,
407                 LLVMValueRef packed,
408                 LLVMValueRef i)
409{
410   LLVMValueRef r, g, b;
411   LLVMValueRef rgba;
412
413   uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
414   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
415
416   return rgba;
417}
418
419
420/**
421 * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS
422 */
423static LLVMValueRef
424rgrb_to_rgba_aos(struct gallivm_state *gallivm,
425                 unsigned n,
426                 LLVMValueRef packed,
427                 LLVMValueRef i)
428{
429   LLVMValueRef r, g, b;
430   LLVMValueRef rgba;
431
432   yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
433   rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
434
435   return rgba;
436}
437
438/**
439 * @param n  is the number of pixels processed
440 * @param packed  is a <n x i32> vector with the packed YUYV blocks
441 * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
442 * @return  a <4*n x i8> vector with the pixel RGBA values in AoS
443 */
444LLVMValueRef
445lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
446                                   const struct util_format_description *format_desc,
447                                   unsigned n,
448                                   LLVMValueRef base_ptr,
449                                   LLVMValueRef offset,
450                                   LLVMValueRef i,
451                                   LLVMValueRef j)
452{
453   LLVMValueRef packed;
454   LLVMValueRef rgba;
455
456   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
457   assert(format_desc->block.bits == 32);
458   assert(format_desc->block.width == 2);
459   assert(format_desc->block.height == 1);
460
461   packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset);
462
463   (void)j;
464
465   switch (format_desc->format) {
466   case PIPE_FORMAT_UYVY:
467      rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);
468      break;
469   case PIPE_FORMAT_YUYV:
470      rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);
471      break;
472   case PIPE_FORMAT_R8G8_B8G8_UNORM:
473      rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);
474      break;
475   case PIPE_FORMAT_G8R8_G8B8_UNORM:
476      rgba = grgb_to_rgba_aos(gallivm, n, packed, i);
477      break;
478   case PIPE_FORMAT_G8R8_B8R8_UNORM:
479      rgba = grbr_to_rgba_aos(gallivm, n, packed, i);
480      break;
481   case PIPE_FORMAT_R8G8_R8B8_UNORM:
482      rgba = rgrb_to_rgba_aos(gallivm, n, packed, i);
483      break;
484   default:
485      assert(0);
486      rgba =  LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));
487      break;
488   }
489
490   return rgba;
491}
492
493