lp_bld_format_yuv.c revision 3469715a8a171512cf9b528702e70393f01c6041
1bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea/************************************************************************** 2bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * 3bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * Copyright 2010 VMware, Inc. 4bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * All Rights Reserved. 5bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * 6bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * Permission is hereby granted, free of charge, to any person obtaining a 7bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * copy of this software and associated documentation files (the 8bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * "Software"), to deal in the Software without restriction, including 9bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * without limitation the rights to use, copy, modify, merge, publish, 10bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * distribute, sub license, and/or sell copies of the Software, and to 11bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * permit persons to whom the Software is furnished to do so, subject to 12bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * the following conditions: 13bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * 14bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 17bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 18bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * USE OR OTHER DEALINGS IN THE SOFTWARE. 21bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * 22bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * The above copyright notice and this permission notice (including the 23bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * next paragraph) shall be included in all copies or substantial portions 24bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * of the Software. 25bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * 26bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea **************************************************************************/ 27bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 28bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 29bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea/** 30bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * @file 31bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * YUV pixel format manipulation. 32bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * 33bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * @author Jose Fonseca <jfonseca@vmware.com> 34bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea */ 35bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 36bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 37bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "util/u_format.h" 38bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "util/u_cpu_detect.h" 39bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 40bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "lp_bld_arit.h" 41bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "lp_bld_type.h" 42bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "lp_bld_const.h" 43bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "lp_bld_conv.h" 44bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "lp_bld_gather.h" 45bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "lp_bld_format.h" 46bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "lp_bld_init.h" 47bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#include "lp_bld_logic.h" 48bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 49bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea/** 50bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * Extract Y, U, V channels from packed UYVY. 51bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * @param packed is a <n x i32> vector with the packed UYVY blocks 52bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) 53bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea */ 54bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleastatic void 55bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleauyvy_to_yuv_soa(struct gallivm_state *gallivm, 56bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea unsigned n, 57bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef packed, 58bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef i, 59bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef *y, 60bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef *u, 61bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef *v) 62bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{ 63bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMBuilderRef builder = gallivm->builder; 64bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct lp_type type; 65bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef mask; 66bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 67bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea memset(&type, 0, sizeof type); 68bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea type.width = 32; 69bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea type.length = n; 70bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 71bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea assert(lp_check_value(type, packed)); 72bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea assert(lp_check_value(type, i)); 73bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 74bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea /* 75bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * y = (uyvy >> (16*i + 8)) & 0xff 76bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * u = (uyvy ) & 0xff 77bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * v = (uyvy >> 16 ) & 0xff 78bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea */ 79bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 80bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 81bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea /* 82bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * Avoid shift with per-element count. 83bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * No support on x86, gets translated to roughly 5 instructions 84bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * per element. Didn't measure performance but cuts shader size 85bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * by quite a bit (less difference if cpu has no sse4.1 support). 86bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea */ 87bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea if (util_cpu_caps.has_sse2 && n > 1) { 88bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef sel, tmp, tmp2; 89bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct lp_build_context bld32; 90bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 91bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea lp_build_context_init(&bld32, gallivm, type); 92bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 93bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); 94bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), ""); 95bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0)); 96bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *y = lp_build_select(&bld32, sel, tmp, tmp2); 97bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea } else 98bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#endif 99bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea { 100bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef shift; 101bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), ""); 102bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), ""); 103bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *y = LLVMBuildLShr(builder, packed, shift, ""); 104bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea } 105bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 106bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *u = packed; 107bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); 108bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 109bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea mask = lp_build_const_int_vec(gallivm, type, 0xff); 110bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 111bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *y = LLVMBuildAnd(builder, *y, mask, "y"); 112bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *u = LLVMBuildAnd(builder, *u, mask, "u"); 113bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *v = LLVMBuildAnd(builder, *v, mask, "v"); 114bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea} 115bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 116bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 117bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea/** 118bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * Extract Y, U, V channels from packed YUYV. 119bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * @param packed is a <n x i32> vector with the packed YUYV blocks 120bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) 121bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea */ 122bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleastatic void 123bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleayuyv_to_yuv_soa(struct gallivm_state *gallivm, 124bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea unsigned n, 125bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef packed, 126bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef i, 127bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef *y, 128bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef *u, 129bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef *v) 130bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{ 131bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMBuilderRef builder = gallivm->builder; 132bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct lp_type type; 133bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef mask; 134bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 135bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea memset(&type, 0, sizeof type); 136bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea type.width = 32; 137bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea type.length = n; 138bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 139bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea assert(lp_check_value(type, packed)); 140bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea assert(lp_check_value(type, i)); 141bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 142bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea /* 143bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * y = (yuyv >> 16*i) & 0xff 144bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * u = (yuyv >> 8 ) & 0xff 145bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * v = (yuyv >> 24 ) & 0xff 146bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea */ 147bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 148bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 149bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea /* 150bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * Avoid shift with per-element count. 151bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * No support on x86, gets translated to roughly 5 instructions 152bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * per element. Didn't measure performance but cuts shader size 153bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea * by quite a bit (less difference if cpu has no sse4.1 support). 154bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea */ 155bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea if (util_cpu_caps.has_sse2 && n > 1) { 156bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef sel, tmp; 157bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct lp_build_context bld32; 158bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 159bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea lp_build_context_init(&bld32, gallivm, type); 160bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 161bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); 162bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0)); 163bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *y = lp_build_select(&bld32, sel, packed, tmp); 164bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea } else 165bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea#endif 166bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea { 167bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef shift; 168bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), ""); 169bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *y = LLVMBuildLShr(builder, packed, shift, ""); 170bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea } 171bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 172bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); 173bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), ""); 174bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 175bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea mask = lp_build_const_int_vec(gallivm, type, 0xff); 176bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 177bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *y = LLVMBuildAnd(builder, *y, mask, "y"); 178bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *u = LLVMBuildAnd(builder, *u, mask, "u"); 179bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea *v = LLVMBuildAnd(builder, *v, mask, "v"); 180bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea} 181bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 182bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 183bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleastatic INLINE void 184bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Maleayuv_to_rgb_soa(struct gallivm_state *gallivm, 185bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea unsigned n, 186bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef y, LLVMValueRef u, LLVMValueRef v, 187bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b) 188bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea{ 189bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMBuilderRef builder = gallivm->builder; 190bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct lp_type type; 191bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea struct lp_build_context bld; 192bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea 193bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef c0; 194bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef c8; 195bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef c16; 196bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef c128; 197bdcd07921b76d3df2cc7e6563718dde79876b0adDaniel Malea LLVMValueRef c255; 198 199 LLVMValueRef cy; 200 LLVMValueRef cug; 201 LLVMValueRef cub; 202 LLVMValueRef cvr; 203 LLVMValueRef cvg; 204 205 memset(&type, 0, sizeof type); 206 type.sign = TRUE; 207 type.width = 32; 208 type.length = n; 209 210 lp_build_context_init(&bld, gallivm, type); 211 212 assert(lp_check_value(type, y)); 213 assert(lp_check_value(type, u)); 214 assert(lp_check_value(type, v)); 215 216 /* 217 * Constants 218 */ 219 220 c0 = lp_build_const_int_vec(gallivm, type, 0); 221 c8 = lp_build_const_int_vec(gallivm, type, 8); 222 c16 = lp_build_const_int_vec(gallivm, type, 16); 223 c128 = lp_build_const_int_vec(gallivm, type, 128); 224 c255 = lp_build_const_int_vec(gallivm, type, 255); 225 226 cy = lp_build_const_int_vec(gallivm, type, 298); 227 cug = lp_build_const_int_vec(gallivm, type, -100); 228 cub = lp_build_const_int_vec(gallivm, type, 516); 229 cvr = lp_build_const_int_vec(gallivm, type, 409); 230 cvg = lp_build_const_int_vec(gallivm, type, -208); 231 232 /* 233 * y -= 16; 234 * u -= 128; 235 * v -= 128; 236 */ 237 238 y = LLVMBuildSub(builder, y, c16, ""); 239 u = LLVMBuildSub(builder, u, c128, ""); 240 v = LLVMBuildSub(builder, v, c128, ""); 241 242 /* 243 * r = 298 * _y + 409 * _v + 128; 244 * g = 298 * _y - 100 * _u - 208 * _v + 128; 245 * b = 298 * _y + 516 * _u + 128; 246 */ 247 248 y = LLVMBuildMul(builder, y, cy, ""); 249 y = LLVMBuildAdd(builder, y, c128, ""); 250 251 *r = LLVMBuildMul(builder, v, cvr, ""); 252 *g = LLVMBuildAdd(builder, 253 LLVMBuildMul(builder, u, cug, ""), 254 LLVMBuildMul(builder, v, cvg, ""), 255 ""); 256 *b = LLVMBuildMul(builder, u, cub, ""); 257 258 *r = LLVMBuildAdd(builder, *r, y, ""); 259 *g = LLVMBuildAdd(builder, *g, y, ""); 260 *b = LLVMBuildAdd(builder, *b, y, ""); 261 262 /* 263 * r >>= 8; 264 * g >>= 8; 265 * b >>= 8; 266 */ 267 268 *r = LLVMBuildAShr(builder, *r, c8, "r"); 269 *g = LLVMBuildAShr(builder, *g, c8, "g"); 270 *b = LLVMBuildAShr(builder, *b, c8, "b"); 271 272 /* 273 * Clamp 274 */ 275 276 *r = lp_build_clamp(&bld, *r, c0, c255); 277 *g = lp_build_clamp(&bld, *g, c0, c255); 278 *b = lp_build_clamp(&bld, *b, c0, c255); 279} 280 281 282static LLVMValueRef 283rgb_to_rgba_aos(struct gallivm_state *gallivm, 284 unsigned n, 285 LLVMValueRef r, LLVMValueRef g, LLVMValueRef b) 286{ 287 LLVMBuilderRef builder = gallivm->builder; 288 struct lp_type type; 289 LLVMValueRef a; 290 LLVMValueRef rgba; 291 292 memset(&type, 0, sizeof type); 293 type.sign = TRUE; 294 type.width = 32; 295 type.length = n; 296 297 assert(lp_check_value(type, r)); 298 assert(lp_check_value(type, g)); 299 assert(lp_check_value(type, b)); 300 301 /* 302 * Make a 4 x unorm8 vector 303 */ 304 305 r = r; 306 g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), ""); 307 b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), ""); 308 a = lp_build_const_int_vec(gallivm, type, 0xff000000); 309 310 rgba = r; 311 rgba = LLVMBuildOr(builder, rgba, g, ""); 312 rgba = LLVMBuildOr(builder, rgba, b, ""); 313 rgba = LLVMBuildOr(builder, rgba, a, ""); 314 315 rgba = LLVMBuildBitCast(builder, rgba, 316 LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), ""); 317 318 return rgba; 319} 320 321 322/** 323 * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS 324 */ 325static LLVMValueRef 326uyvy_to_rgba_aos(struct gallivm_state *gallivm, 327 unsigned n, 328 LLVMValueRef packed, 329 LLVMValueRef i) 330{ 331 LLVMValueRef y, u, v; 332 LLVMValueRef r, g, b; 333 LLVMValueRef rgba; 334 335 uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v); 336 yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b); 337 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 338 339 return rgba; 340} 341 342 343/** 344 * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS 345 */ 346static LLVMValueRef 347yuyv_to_rgba_aos(struct gallivm_state *gallivm, 348 unsigned n, 349 LLVMValueRef packed, 350 LLVMValueRef i) 351{ 352 LLVMValueRef y, u, v; 353 LLVMValueRef r, g, b; 354 LLVMValueRef rgba; 355 356 yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v); 357 yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b); 358 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 359 360 return rgba; 361} 362 363 364/** 365 * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS 366 */ 367static LLVMValueRef 368rgbg_to_rgba_aos(struct gallivm_state *gallivm, 369 unsigned n, 370 LLVMValueRef packed, 371 LLVMValueRef i) 372{ 373 LLVMValueRef r, g, b; 374 LLVMValueRef rgba; 375 376 uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b); 377 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 378 379 return rgba; 380} 381 382 383/** 384 * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS 385 */ 386static LLVMValueRef 387grgb_to_rgba_aos(struct gallivm_state *gallivm, 388 unsigned n, 389 LLVMValueRef packed, 390 LLVMValueRef i) 391{ 392 LLVMValueRef r, g, b; 393 LLVMValueRef rgba; 394 395 yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b); 396 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 397 398 return rgba; 399} 400 401/** 402 * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS 403 */ 404static LLVMValueRef 405grbr_to_rgba_aos(struct gallivm_state *gallivm, 406 unsigned n, 407 LLVMValueRef packed, 408 LLVMValueRef i) 409{ 410 LLVMValueRef r, g, b; 411 LLVMValueRef rgba; 412 413 uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b); 414 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 415 416 return rgba; 417} 418 419 420/** 421 * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS 422 */ 423static LLVMValueRef 424rgrb_to_rgba_aos(struct gallivm_state *gallivm, 425 unsigned n, 426 LLVMValueRef packed, 427 LLVMValueRef i) 428{ 429 LLVMValueRef r, g, b; 430 LLVMValueRef rgba; 431 432 yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b); 433 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 434 435 return rgba; 436} 437 438/** 439 * @param n is the number of pixels processed 440 * @param packed is a <n x i32> vector with the packed YUYV blocks 441 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) 442 * @return a <4*n x i8> vector with the pixel RGBA values in AoS 443 */ 444LLVMValueRef 445lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm, 446 const struct util_format_description *format_desc, 447 unsigned n, 448 LLVMValueRef base_ptr, 449 LLVMValueRef offset, 450 LLVMValueRef i, 451 LLVMValueRef j) 452{ 453 LLVMValueRef packed; 454 LLVMValueRef rgba; 455 456 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED); 457 assert(format_desc->block.bits == 32); 458 assert(format_desc->block.width == 2); 459 assert(format_desc->block.height == 1); 460 461 packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset); 462 463 (void)j; 464 465 switch (format_desc->format) { 466 case PIPE_FORMAT_UYVY: 467 rgba = uyvy_to_rgba_aos(gallivm, n, packed, i); 468 break; 469 case PIPE_FORMAT_YUYV: 470 rgba = yuyv_to_rgba_aos(gallivm, n, packed, i); 471 break; 472 case PIPE_FORMAT_R8G8_B8G8_UNORM: 473 rgba = rgbg_to_rgba_aos(gallivm, n, packed, i); 474 break; 475 case PIPE_FORMAT_G8R8_G8B8_UNORM: 476 rgba = grgb_to_rgba_aos(gallivm, n, packed, i); 477 break; 478 case PIPE_FORMAT_G8R8_B8R8_UNORM: 479 rgba = grbr_to_rgba_aos(gallivm, n, packed, i); 480 break; 481 case PIPE_FORMAT_R8G8_R8B8_UNORM: 482 rgba = rgrb_to_rgba_aos(gallivm, n, packed, i); 483 break; 484 default: 485 assert(0); 486 rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n)); 487 break; 488 } 489 490 return rgba; 491} 492 493