lp_bld_format_yuv.c revision 461c34c0cb0e23f11fd9390a37a62d9930a1c48e
1fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton/************************************************************************** 2e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton * 3fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * Copyright 2010 VMware, Inc. 4fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * All Rights Reserved. 5e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton * 6fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * Permission is hereby granted, free of charge, to any person obtaining a 7fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * copy of this software and associated documentation files (the 8fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * "Software"), to deal in the Software without restriction, including 9fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * without limitation the rights to use, copy, modify, merge, publish, 10fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * distribute, sub license, and/or sell copies of the Software, and to 11fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * permit persons to whom the Software is furnished to do so, subject to 12fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * the following conditions: 13e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton * 14fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 17e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 18fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * USE OR OTHER DEALINGS IN THE SOFTWARE. 21fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * 22fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * The above copyright notice and this permission notice (including the 23fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * next paragraph) shall be included in all copies or substantial portions 24fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton * of the Software. 25e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton * 26fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton **************************************************************************/ 27fcb595c04f9ee275eae49b7bb7c61246671f5ce2Younes Manton 28c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton 29b48676672592271597d07e5ece79cf4d3ffbe04bChristian König/** 302ef8c60e558938686196bf8ff4d22fd57903bf4cCooper Yuan * @file 31e44c85637a3298918e292e9ddba812856cf92924Younes Manton * YUV pixel format manipulation. 32b48676672592271597d07e5ece79cf4d3ffbe04bChristian König * 337ea550621e25f9b2f344b2ed60551ceec91b3fcfKai Wasserbäch * @author Jose Fonseca <jfonseca@vmware.com> 347ea550621e25f9b2f344b2ed60551ceec91b3fcfKai Wasserbäch */ 357ea550621e25f9b2f344b2ed60551ceec91b3fcfKai Wasserbäch 367ea550621e25f9b2f344b2ed60551ceec91b3fcfKai Wasserbäch 37b48676672592271597d07e5ece79cf4d3ffbe04bChristian König#include "util/u_format.h" 387ea550621e25f9b2f344b2ed60551ceec91b3fcfKai Wasserbäch#include "util/u_cpu_detect.h" 39b48676672592271597d07e5ece79cf4d3ffbe04bChristian König 407ea550621e25f9b2f344b2ed60551ceec91b3fcfKai Wasserbäch#include "lp_bld_arit.h" 41fc0a5e21d77ae2f082fd19dd2295e84f6fb7bd3bChristian König#include "lp_bld_type.h" 42b48676672592271597d07e5ece79cf4d3ffbe04bChristian König#include "lp_bld_const.h" 43e44c85637a3298918e292e9ddba812856cf92924Younes Manton#include "lp_bld_conv.h" 44c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton#include "lp_bld_gather.h" 45e44c85637a3298918e292e9ddba812856cf92924Younes Manton#include "lp_bld_format.h" 46e44c85637a3298918e292e9ddba812856cf92924Younes Manton#include "lp_bld_init.h" 4770c44073ad3f333ed40c5c297a934a359c839e94Younes Manton#include "lp_bld_logic.h" 485eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton 495eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton/** 505eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton * Extract Y, U, V channels from packed UYVY. 51c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton * @param packed is a <n x i32> vector with the packed UYVY blocks 52e44c85637a3298918e292e9ddba812856cf92924Younes Manton * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) 53e44c85637a3298918e292e9ddba812856cf92924Younes Manton */ 54e44c85637a3298918e292e9ddba812856cf92924Younes Mantonstatic void 55e44c85637a3298918e292e9ddba812856cf92924Younes Mantonuyvy_to_yuv_soa(struct gallivm_state *gallivm, 568b02f9e67b83e40019d6b07b9a035ba5d5042688Christian König unsigned n, 57e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef packed, 58e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef i, 59e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef *y, 60e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef *u, 61e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef *v) 62e44c85637a3298918e292e9ddba812856cf92924Younes Manton{ 63e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMBuilderRef builder = gallivm->builder; 6470c44073ad3f333ed40c5c297a934a359c839e94Younes Manton struct lp_type type; 655eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton LLVMValueRef mask; 665eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton 67e44c85637a3298918e292e9ddba812856cf92924Younes Manton memset(&type, 0, sizeof type); 68e44c85637a3298918e292e9ddba812856cf92924Younes Manton type.width = 32; 69e44c85637a3298918e292e9ddba812856cf92924Younes Manton type.length = n; 70d52d51ab8ae1240f77b6c18c3e99be4bf4868037Younes Manton 71e44c85637a3298918e292e9ddba812856cf92924Younes Manton assert(lp_check_value(type, packed)); 72e44c85637a3298918e292e9ddba812856cf92924Younes Manton assert(lp_check_value(type, i)); 73e44c85637a3298918e292e9ddba812856cf92924Younes Manton 74e44c85637a3298918e292e9ddba812856cf92924Younes Manton /* 75d52d51ab8ae1240f77b6c18c3e99be4bf4868037Younes Manton * y = (uyvy >> (16*i + 8)) & 0xff 76d52d51ab8ae1240f77b6c18c3e99be4bf4868037Younes Manton * u = (uyvy ) & 0xff 77e44c85637a3298918e292e9ddba812856cf92924Younes Manton * v = (uyvy >> 16 ) & 0xff 78e44c85637a3298918e292e9ddba812856cf92924Younes Manton */ 79e44c85637a3298918e292e9ddba812856cf92924Younes Manton 80e44c85637a3298918e292e9ddba812856cf92924Younes Manton#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 81e44c85637a3298918e292e9ddba812856cf92924Younes Manton /* 82e44c85637a3298918e292e9ddba812856cf92924Younes Manton * Avoid shift with per-element count. 83e44c85637a3298918e292e9ddba812856cf92924Younes Manton * No support on x86, gets translated to roughly 5 instructions 84e44c85637a3298918e292e9ddba812856cf92924Younes Manton * per element. Didn't measure performance but cuts shader size 85d52d51ab8ae1240f77b6c18c3e99be4bf4868037Younes Manton * by quite a bit (less difference if cpu has no sse4.1 support). 86e44c85637a3298918e292e9ddba812856cf92924Younes Manton */ 87e44c85637a3298918e292e9ddba812856cf92924Younes Manton if (util_cpu_caps.has_sse2 && n == 4) { 88e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef sel, tmp, tmp2; 89e44c85637a3298918e292e9ddba812856cf92924Younes Manton struct lp_build_context bld32; 90d52d51ab8ae1240f77b6c18c3e99be4bf4868037Younes Manton 91e44c85637a3298918e292e9ddba812856cf92924Younes Manton lp_build_context_init(&bld32, gallivm, type); 92e44c85637a3298918e292e9ddba812856cf92924Younes Manton 93e44c85637a3298918e292e9ddba812856cf92924Younes Manton tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); 94e44c85637a3298918e292e9ddba812856cf92924Younes Manton tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), ""); 95e44c85637a3298918e292e9ddba812856cf92924Younes Manton sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0)); 96e44c85637a3298918e292e9ddba812856cf92924Younes Manton *y = lp_build_select(&bld32, sel, tmp, tmp2); 97e44c85637a3298918e292e9ddba812856cf92924Younes Manton } else 98e44c85637a3298918e292e9ddba812856cf92924Younes Manton#endif 9970c44073ad3f333ed40c5c297a934a359c839e94Younes Manton { 1005eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton LLVMValueRef shift; 1015eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), ""); 102e44c85637a3298918e292e9ddba812856cf92924Younes Manton shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), ""); 1038580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton *y = LLVMBuildLShr(builder, packed, shift, ""); 1045eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton } 1058580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton 1065eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton *u = packed; 1078580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); 1088580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton 1098580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton mask = lp_build_const_int_vec(gallivm, type, 0xff); 1105eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton 1115eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton *y = LLVMBuildAnd(builder, *y, mask, "y"); 1125eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton *u = LLVMBuildAnd(builder, *u, mask, "u"); 1135eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton *v = LLVMBuildAnd(builder, *v, mask, "v"); 114e44c85637a3298918e292e9ddba812856cf92924Younes Manton} 115e44c85637a3298918e292e9ddba812856cf92924Younes Manton 116d52d51ab8ae1240f77b6c18c3e99be4bf4868037Younes Manton 117e44c85637a3298918e292e9ddba812856cf92924Younes Manton/** 118e44c85637a3298918e292e9ddba812856cf92924Younes Manton * Extract Y, U, V channels from packed YUYV. 119e44c85637a3298918e292e9ddba812856cf92924Younes Manton * @param packed is a <n x i32> vector with the packed YUYV blocks 120e44c85637a3298918e292e9ddba812856cf92924Younes Manton * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) 121e44c85637a3298918e292e9ddba812856cf92924Younes Manton */ 122e44c85637a3298918e292e9ddba812856cf92924Younes Mantonstatic void 1238580b7a0eeed3fc29320b2c0a184084e4267661aYounes Mantonyuyv_to_yuv_soa(struct gallivm_state *gallivm, 1248580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton unsigned n, 125e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef packed, 1268580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton LLVMValueRef i, 1278580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton LLVMValueRef *y, 1288580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton LLVMValueRef *u, 129e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef *v) 1308580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton{ 1318580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton LLVMBuilderRef builder = gallivm->builder; 1328580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton struct lp_type type; 1338580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton LLVMValueRef mask; 134e44c85637a3298918e292e9ddba812856cf92924Younes Manton 1358580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton memset(&type, 0, sizeof type); 1368580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton type.width = 32; 1378580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton type.length = n; 138e44c85637a3298918e292e9ddba812856cf92924Younes Manton 1398580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton assert(lp_check_value(type, packed)); 140e44c85637a3298918e292e9ddba812856cf92924Younes Manton assert(lp_check_value(type, i)); 141e44c85637a3298918e292e9ddba812856cf92924Younes Manton 142c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton /* 143c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton * y = (yuyv >> 16*i) & 0xff 144e44c85637a3298918e292e9ddba812856cf92924Younes Manton * u = (yuyv >> 8 ) & 0xff 1456858dd50c9b696c1c6044f5a403000f9d20b286bYounes Manton * v = (yuyv >> 24 ) & 0xff 146e44c85637a3298918e292e9ddba812856cf92924Younes Manton */ 147e44c85637a3298918e292e9ddba812856cf92924Younes Manton 148e44c85637a3298918e292e9ddba812856cf92924Younes Manton#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 149e44c85637a3298918e292e9ddba812856cf92924Younes Manton /* 150e44c85637a3298918e292e9ddba812856cf92924Younes Manton * Avoid shift with per-element count. 151e44c85637a3298918e292e9ddba812856cf92924Younes Manton * No support on x86, gets translated to roughly 5 instructions 152e44c85637a3298918e292e9ddba812856cf92924Younes Manton * per element. Didn't measure performance but cuts shader size 153e44c85637a3298918e292e9ddba812856cf92924Younes Manton * by quite a bit (less difference if cpu has no sse4.1 support). 154e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton */ 155e44c85637a3298918e292e9ddba812856cf92924Younes Manton if (util_cpu_caps.has_sse2 && n == 4) { 156e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef sel, tmp; 1578580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton struct lp_build_context bld32; 1588580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton 159e44c85637a3298918e292e9ddba812856cf92924Younes Manton lp_build_context_init(&bld32, gallivm, type); 1606858dd50c9b696c1c6044f5a403000f9d20b286bYounes Manton 1616858dd50c9b696c1c6044f5a403000f9d20b286bYounes Manton tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); 162e44c85637a3298918e292e9ddba812856cf92924Younes Manton sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0)); 1636858dd50c9b696c1c6044f5a403000f9d20b286bYounes Manton *y = lp_build_select(&bld32, sel, packed, tmp); 164d52d51ab8ae1240f77b6c18c3e99be4bf4868037Younes Manton } else 165e44c85637a3298918e292e9ddba812856cf92924Younes Manton#endif 166e44c85637a3298918e292e9ddba812856cf92924Younes Manton { 167e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef shift; 168e44c85637a3298918e292e9ddba812856cf92924Younes Manton shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), ""); 169e44c85637a3298918e292e9ddba812856cf92924Younes Manton *y = LLVMBuildLShr(builder, packed, shift, ""); 170e44c85637a3298918e292e9ddba812856cf92924Younes Manton } 171e44c85637a3298918e292e9ddba812856cf92924Younes Manton 172e44c85637a3298918e292e9ddba812856cf92924Younes Manton *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); 173e44c85637a3298918e292e9ddba812856cf92924Younes Manton *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), ""); 174e44c85637a3298918e292e9ddba812856cf92924Younes Manton 1758580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton mask = lp_build_const_int_vec(gallivm, type, 0xff); 1768580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton 177e44c85637a3298918e292e9ddba812856cf92924Younes Manton *y = LLVMBuildAnd(builder, *y, mask, "y"); 1786858dd50c9b696c1c6044f5a403000f9d20b286bYounes Manton *u = LLVMBuildAnd(builder, *u, mask, "u"); 1796858dd50c9b696c1c6044f5a403000f9d20b286bYounes Manton *v = LLVMBuildAnd(builder, *v, mask, "v"); 1803107b54b011c7ceef2b314632bdcf0b87c5e4d36Younes Manton} 181e44c85637a3298918e292e9ddba812856cf92924Younes Manton 182e44c85637a3298918e292e9ddba812856cf92924Younes Manton 183c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Mantonstatic INLINE void 184e44c85637a3298918e292e9ddba812856cf92924Younes Mantonyuv_to_rgb_soa(struct gallivm_state *gallivm, 1853d40d4f391e2fc319a03d8f171a2cfb9daf250c8Christian König unsigned n, 1863d40d4f391e2fc319a03d8f171a2cfb9daf250c8Christian König LLVMValueRef y, LLVMValueRef u, LLVMValueRef v, 1873d40d4f391e2fc319a03d8f171a2cfb9daf250c8Christian König LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b) 1883d40d4f391e2fc319a03d8f171a2cfb9daf250c8Christian König{ 1893d40d4f391e2fc319a03d8f171a2cfb9daf250c8Christian König LLVMBuilderRef builder = gallivm->builder; 1903d40d4f391e2fc319a03d8f171a2cfb9daf250c8Christian König struct lp_type type; 191e44c85637a3298918e292e9ddba812856cf92924Younes Manton struct lp_build_context bld; 192e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton 1931448e829e86981e6144410ba6a3d0f16357fb2b3Christian König LLVMValueRef c0; 194e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef c8; 19514766f820069ca987543918bce96410c481e5d20Christian König LLVMValueRef c16; 196e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef c128; 1978580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton LLVMValueRef c255; 1988580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton 199e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef cy; 200e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef cug; 201e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef cub; 202e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef cvr; 203e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef cvg; 204e44c85637a3298918e292e9ddba812856cf92924Younes Manton 2055eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton memset(&type, 0, sizeof type); 2065eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton type.sign = TRUE; 207e44c85637a3298918e292e9ddba812856cf92924Younes Manton type.width = 32; 208e44c85637a3298918e292e9ddba812856cf92924Younes Manton type.length = n; 209e44c85637a3298918e292e9ddba812856cf92924Younes Manton 210e44c85637a3298918e292e9ddba812856cf92924Younes Manton lp_build_context_init(&bld, gallivm, type); 211e44c85637a3298918e292e9ddba812856cf92924Younes Manton 21270c44073ad3f333ed40c5c297a934a359c839e94Younes Manton assert(lp_check_value(type, y)); 21370c44073ad3f333ed40c5c297a934a359c839e94Younes Manton assert(lp_check_value(type, u)); 2148580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton assert(lp_check_value(type, v)); 21570c44073ad3f333ed40c5c297a934a359c839e94Younes Manton 21670c44073ad3f333ed40c5c297a934a359c839e94Younes Manton /* 217fcdf50f74befad8d89eb3f9cdfd88b82d1daa98cChristian König * Constants 21842c7291d2cb50c2bd94dd9346a8402a24303d66dChristian König */ 21970c44073ad3f333ed40c5c297a934a359c839e94Younes Manton 22070c44073ad3f333ed40c5c297a934a359c839e94Younes Manton c0 = lp_build_const_int_vec(gallivm, type, 0); 2213cbe27a9888b94d1ab24b5e76ebd7563a7d8c6b8Christian König c8 = lp_build_const_int_vec(gallivm, type, 8); 2223cbe27a9888b94d1ab24b5e76ebd7563a7d8c6b8Christian König c16 = lp_build_const_int_vec(gallivm, type, 16); 22370c44073ad3f333ed40c5c297a934a359c839e94Younes Manton c128 = lp_build_const_int_vec(gallivm, type, 128); 22470c44073ad3f333ed40c5c297a934a359c839e94Younes Manton c255 = lp_build_const_int_vec(gallivm, type, 255); 22570c44073ad3f333ed40c5c297a934a359c839e94Younes Manton 226e44c85637a3298918e292e9ddba812856cf92924Younes Manton cy = lp_build_const_int_vec(gallivm, type, 298); 227e44c85637a3298918e292e9ddba812856cf92924Younes Manton cug = lp_build_const_int_vec(gallivm, type, -100); 228e44c85637a3298918e292e9ddba812856cf92924Younes Manton cub = lp_build_const_int_vec(gallivm, type, 516); 229e44c85637a3298918e292e9ddba812856cf92924Younes Manton cvr = lp_build_const_int_vec(gallivm, type, 409); 230e44c85637a3298918e292e9ddba812856cf92924Younes Manton cvg = lp_build_const_int_vec(gallivm, type, -208); 231e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton 232e44c85637a3298918e292e9ddba812856cf92924Younes Manton /* 233e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton * y -= 16; 2348580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton * u -= 128; 235e44c85637a3298918e292e9ddba812856cf92924Younes Manton * v -= 128; 236e44c85637a3298918e292e9ddba812856cf92924Younes Manton */ 237e44c85637a3298918e292e9ddba812856cf92924Younes Manton 238e44c85637a3298918e292e9ddba812856cf92924Younes Manton y = LLVMBuildSub(builder, y, c16, ""); 2391448e829e86981e6144410ba6a3d0f16357fb2b3Christian König u = LLVMBuildSub(builder, u, c128, ""); 2401448e829e86981e6144410ba6a3d0f16357fb2b3Christian König v = LLVMBuildSub(builder, v, c128, ""); 2418580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton 242e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton /* 243e44c85637a3298918e292e9ddba812856cf92924Younes Manton * r = 298 * _y + 409 * _v + 128; 244e44c85637a3298918e292e9ddba812856cf92924Younes Manton * g = 298 * _y - 100 * _u - 208 * _v + 128; 245e44c85637a3298918e292e9ddba812856cf92924Younes Manton * b = 298 * _y + 516 * _u + 128; 246e44c85637a3298918e292e9ddba812856cf92924Younes Manton */ 2471448e829e86981e6144410ba6a3d0f16357fb2b3Christian König 248ea78480029450c019287c2a94d7c42a6a1d12dc3Christian König y = LLVMBuildMul(builder, y, cy, ""); 2491448e829e86981e6144410ba6a3d0f16357fb2b3Christian König y = LLVMBuildAdd(builder, y, c128, ""); 250ea78480029450c019287c2a94d7c42a6a1d12dc3Christian König 251ea78480029450c019287c2a94d7c42a6a1d12dc3Christian König *r = LLVMBuildMul(builder, v, cvr, ""); 2528c2bfa34a0d70ab08de44e3b091b3a097abbad97Christian König *g = LLVMBuildAdd(builder, 2538c2bfa34a0d70ab08de44e3b091b3a097abbad97Christian König LLVMBuildMul(builder, u, cug, ""), 254ea78480029450c019287c2a94d7c42a6a1d12dc3Christian König LLVMBuildMul(builder, v, cvg, ""), 255d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König ""); 256d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König *b = LLVMBuildMul(builder, u, cub, ""); 257d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König 2581448e829e86981e6144410ba6a3d0f16357fb2b3Christian König *r = LLVMBuildAdd(builder, *r, y, ""); 259d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König *g = LLVMBuildAdd(builder, *g, y, ""); 260d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König *b = LLVMBuildAdd(builder, *b, y, ""); 261d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König 262d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König /* 263d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König * r >>= 8; 2641448e829e86981e6144410ba6a3d0f16357fb2b3Christian König * g >>= 8; 265e5f78a74f8294ee02015552db664dae1e7da9f47Christian König * b >>= 8; 266d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König */ 2671448e829e86981e6144410ba6a3d0f16357fb2b3Christian König 268e5f78a74f8294ee02015552db664dae1e7da9f47Christian König *r = LLVMBuildAShr(builder, *r, c8, "r"); 269e5f78a74f8294ee02015552db664dae1e7da9f47Christian König *g = LLVMBuildAShr(builder, *g, c8, "g"); 270e5f78a74f8294ee02015552db664dae1e7da9f47Christian König *b = LLVMBuildAShr(builder, *b, c8, "b"); 271e5f78a74f8294ee02015552db664dae1e7da9f47Christian König 272e5f78a74f8294ee02015552db664dae1e7da9f47Christian König /* 27332c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König * Clamp 27432c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König */ 27532c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König 27632c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König *r = lp_build_clamp(&bld, *r, c0, c255); 27732c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König *g = lp_build_clamp(&bld, *g, c0, c255); 27832c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König *b = lp_build_clamp(&bld, *b, c0, c255); 27932c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König} 28032c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König 28132c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König 28232c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian Königstatic LLVMValueRef 28332c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian Königrgb_to_rgba_aos(struct gallivm_state *gallivm, 284c7b65dcaffeb9d0760c8ecad052f4c79297bfc8aChristian König unsigned n, 285c7b65dcaffeb9d0760c8ecad052f4c79297bfc8aChristian König LLVMValueRef r, LLVMValueRef g, LLVMValueRef b) 286c7b65dcaffeb9d0760c8ecad052f4c79297bfc8aChristian König{ 287c7b65dcaffeb9d0760c8ecad052f4c79297bfc8aChristian König LLVMBuilderRef builder = gallivm->builder; 288c7b65dcaffeb9d0760c8ecad052f4c79297bfc8aChristian König struct lp_type type; 28962db9b21da6ccad6301feae9b90d53d46224c854Younes Manton LLVMValueRef a; 29062db9b21da6ccad6301feae9b90d53d46224c854Younes Manton LLVMValueRef rgba; 291c7b65dcaffeb9d0760c8ecad052f4c79297bfc8aChristian König 29214766f820069ca987543918bce96410c481e5d20Christian König memset(&type, 0, sizeof type); 29362db9b21da6ccad6301feae9b90d53d46224c854Younes Manton type.sign = TRUE; 29414766f820069ca987543918bce96410c481e5d20Christian König type.width = 32; 29562db9b21da6ccad6301feae9b90d53d46224c854Younes Manton type.length = n; 2961448e829e86981e6144410ba6a3d0f16357fb2b3Christian König 2971448e829e86981e6144410ba6a3d0f16357fb2b3Christian König assert(lp_check_value(type, r)); 2985eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton assert(lp_check_value(type, g)); 2995eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5Younes Manton assert(lp_check_value(type, b)); 300e44c85637a3298918e292e9ddba812856cf92924Younes Manton 301e44c85637a3298918e292e9ddba812856cf92924Younes Manton /* 302e44c85637a3298918e292e9ddba812856cf92924Younes Manton * Make a 4 x unorm8 vector 303e44c85637a3298918e292e9ddba812856cf92924Younes Manton */ 304e44c85637a3298918e292e9ddba812856cf92924Younes Manton 305e44c85637a3298918e292e9ddba812856cf92924Younes Manton r = r; 306e44c85637a3298918e292e9ddba812856cf92924Younes Manton g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), ""); 307e44c85637a3298918e292e9ddba812856cf92924Younes Manton b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), ""); 308e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9feYounes Manton a = lp_build_const_int_vec(gallivm, type, 0xff000000); 309e44c85637a3298918e292e9ddba812856cf92924Younes Manton 310e44c85637a3298918e292e9ddba812856cf92924Younes Manton rgba = r; 3118580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton rgba = LLVMBuildOr(builder, rgba, g, ""); 3128580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton rgba = LLVMBuildOr(builder, rgba, b, ""); 313e44c85637a3298918e292e9ddba812856cf92924Younes Manton rgba = LLVMBuildOr(builder, rgba, a, ""); 314c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton 315c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton rgba = LLVMBuildBitCast(builder, rgba, 3163107b54b011c7ceef2b314632bdcf0b87c5e4d36Younes Manton LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), ""); 317e44c85637a3298918e292e9ddba812856cf92924Younes Manton 318c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton return rgba; 319e44c85637a3298918e292e9ddba812856cf92924Younes Manton} 3206858dd50c9b696c1c6044f5a403000f9d20b286bYounes Manton 3218580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton 3228580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton/** 323e44c85637a3298918e292e9ddba812856cf92924Younes Manton * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS 324c11a7ec821d41b91a3825c5abfb4687c98b5bf98Younes Manton */ 325e44c85637a3298918e292e9ddba812856cf92924Younes Mantonstatic LLVMValueRef 326e44c85637a3298918e292e9ddba812856cf92924Younes Mantonuyvy_to_rgba_aos(struct gallivm_state *gallivm, 32790bd0e338d315c426c2d0255331610055023739eYounes Manton unsigned n, 328e44c85637a3298918e292e9ddba812856cf92924Younes Manton LLVMValueRef packed, 329d9ad3aa3b9647f1ede2568600978af956ff32fffChristian König LLVMValueRef i) 33032c4381d4a0479b3d9bfe305ce701be6b5ac8e18Christian König{ 331bd5fd67a3e3cda4b7676dd4745fc5d5524709210Christian König LLVMValueRef y, u, v; 3321448e829e86981e6144410ba6a3d0f16357fb2b3Christian König LLVMValueRef r, g, b; 3331448e829e86981e6144410ba6a3d0f16357fb2b3Christian König LLVMValueRef rgba; 334e44c85637a3298918e292e9ddba812856cf92924Younes Manton 335e44c85637a3298918e292e9ddba812856cf92924Younes Manton uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v); 33690bd0e338d315c426c2d0255331610055023739eYounes Manton yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b); 3378580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 3388580b7a0eeed3fc29320b2c0a184084e4267661aYounes Manton 339e44c85637a3298918e292e9ddba812856cf92924Younes Manton return rgba; 34090bd0e338d315c426c2d0255331610055023739eYounes Manton} 341 342 343/** 344 * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS 345 */ 346static LLVMValueRef 347yuyv_to_rgba_aos(struct gallivm_state *gallivm, 348 unsigned n, 349 LLVMValueRef packed, 350 LLVMValueRef i) 351{ 352 LLVMValueRef y, u, v; 353 LLVMValueRef r, g, b; 354 LLVMValueRef rgba; 355 356 yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v); 357 yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b); 358 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 359 360 return rgba; 361} 362 363 364/** 365 * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS 366 */ 367static LLVMValueRef 368rgbg_to_rgba_aos(struct gallivm_state *gallivm, 369 unsigned n, 370 LLVMValueRef packed, 371 LLVMValueRef i) 372{ 373 LLVMValueRef r, g, b; 374 LLVMValueRef rgba; 375 376 uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b); 377 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 378 379 return rgba; 380} 381 382 383/** 384 * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS 385 */ 386static LLVMValueRef 387grgb_to_rgba_aos(struct gallivm_state *gallivm, 388 unsigned n, 389 LLVMValueRef packed, 390 LLVMValueRef i) 391{ 392 LLVMValueRef r, g, b; 393 LLVMValueRef rgba; 394 395 yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b); 396 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 397 398 return rgba; 399} 400 401/** 402 * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS 403 */ 404static LLVMValueRef 405grbr_to_rgba_aos(struct gallivm_state *gallivm, 406 unsigned n, 407 LLVMValueRef packed, 408 LLVMValueRef i) 409{ 410 LLVMValueRef r, g, b; 411 LLVMValueRef rgba; 412 413 uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b); 414 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 415 416 return rgba; 417} 418 419 420/** 421 * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS 422 */ 423static LLVMValueRef 424rgrb_to_rgba_aos(struct gallivm_state *gallivm, 425 unsigned n, 426 LLVMValueRef packed, 427 LLVMValueRef i) 428{ 429 LLVMValueRef r, g, b; 430 LLVMValueRef rgba; 431 432 yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b); 433 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 434 435 return rgba; 436} 437 438/** 439 * @param n is the number of pixels processed 440 * @param packed is a <n x i32> vector with the packed YUYV blocks 441 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) 442 * @return a <4*n x i8> vector with the pixel RGBA values in AoS 443 */ 444LLVMValueRef 445lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm, 446 const struct util_format_description *format_desc, 447 unsigned n, 448 LLVMValueRef base_ptr, 449 LLVMValueRef offset, 450 LLVMValueRef i, 451 LLVMValueRef j) 452{ 453 LLVMValueRef packed; 454 LLVMValueRef rgba; 455 456 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED); 457 assert(format_desc->block.bits == 32); 458 assert(format_desc->block.width == 2); 459 assert(format_desc->block.height == 1); 460 461 packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset); 462 463 (void)j; 464 465 switch (format_desc->format) { 466 case PIPE_FORMAT_UYVY: 467 rgba = uyvy_to_rgba_aos(gallivm, n, packed, i); 468 break; 469 case PIPE_FORMAT_YUYV: 470 rgba = yuyv_to_rgba_aos(gallivm, n, packed, i); 471 break; 472 case PIPE_FORMAT_R8G8_B8G8_UNORM: 473 rgba = rgbg_to_rgba_aos(gallivm, n, packed, i); 474 break; 475 case PIPE_FORMAT_G8R8_G8B8_UNORM: 476 rgba = grgb_to_rgba_aos(gallivm, n, packed, i); 477 break; 478 case PIPE_FORMAT_G8R8_B8R8_UNORM: 479 rgba = grbr_to_rgba_aos(gallivm, n, packed, i); 480 break; 481 case PIPE_FORMAT_R8G8_R8B8_UNORM: 482 rgba = rgrb_to_rgba_aos(gallivm, n, packed, i); 483 break; 484 default: 485 assert(0); 486 rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n)); 487 break; 488 } 489 490 return rgba; 491} 492 493