1b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* 2b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Copyright 2003 Tungsten Graphics, inc. 3b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * All Rights Reserved. 4b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 5b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Permission is hereby granted, free of charge, to any person obtaining a 6b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * copy of this software and associated documentation files (the "Software"), 7b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * to deal in the Software without restriction, including without limitation 8b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * on the rights to use, copy, modify, merge, publish, distribute, sub 9b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * license, and/or sell copies of the Software, and to permit persons to whom 10b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * the Software is furnished to do so, subject to the following conditions: 11b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 12b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * The above copyright notice and this permission notice (including the next 13b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * paragraph) shall be included in all copies or substantial portions of the 14b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Software. 15b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 16b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * USE OR OTHER DEALINGS IN THE SOFTWARE. 23b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 24b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Authors: 25b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Keith Whitwell <keithw@tungstengraphics.com> 26b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 27b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 28bbd287103dad776d8a45c87c4e51fbc26d9b80d5Brian Paul#include "main/glheader.h" 29bbd287103dad776d8a45c87c4e51fbc26d9b80d5Brian Paul#include "main/context.h" 30bbd287103dad776d8a45c87c4e51fbc26d9b80d5Brian Paul#include "main/colormac.h" 31bbd287103dad776d8a45c87c4e51fbc26d9b80d5Brian Paul#include "main/simple_list.h" 32bbd287103dad776d8a45c87c4e51fbc26d9b80d5Brian Paul#include "main/enums.h" 334ef82cee6d297684bc178dc243e4d3b6c5704955Brian Paul#include "swrast/s_chan.h" 34b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include "t_context.h" 35b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include "t_vertex.h" 36b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 371d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell#if defined(USE_SSE_ASM) 381d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell 391d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell#include "x86/rtasm/x86sse.h" 401d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell#include "x86/common_x86_asm.h" 411d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell 4218a74321aa825c355392f98f1563a971871794ccKeith Whitwell 43417cb2c1829f2119f6674987edac09c61d633b45Brian/** 44417cb2c1829f2119f6674987edac09c61d633b45Brian * Number of bytes to allocate for generated SSE functions 45417cb2c1829f2119f6674987edac09c61d633b45Brian */ 46417cb2c1829f2119f6674987edac09c61d633b45Brian#define MAX_SSE_CODE_SIZE 1024 47417cb2c1829f2119f6674987edac09c61d633b45Brian 48417cb2c1829f2119f6674987edac09c61d633b45Brian 49b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define X 0 50b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define Y 1 51b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define Z 2 52b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define W 3 53b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 54b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 55b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstruct x86_program { 561d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell struct x86_function func; 57b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 58f9995b30756140724f41daf963fa06167912be7fKristian Høgsberg struct gl_context *ctx; 59b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLboolean inputs_safe; 60b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLboolean outputs_safe; 616040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell GLboolean have_sse2; 62b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 63b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg identity; 64c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell struct x86_reg chan0; 65b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}; 66b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 67b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 68b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic struct x86_reg get_identity( struct x86_program *p ) 69b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 70b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell return p->identity; 71b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 72b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 73b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load4f_4( struct x86_program *p, 74b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 75b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 76b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 771d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movups(&p->func, dest, arg0); 78b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 79b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 80b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load4f_3( struct x86_program *p, 81b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 82b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 83b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 84b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Have to jump through some hoops: 85b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 86c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell * c 0 0 0 87b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * c 0 0 1 88b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 0 0 c 1 89c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell * a b c 1 90b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 911d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movss(&p->func, dest, x86_make_disp(arg0, 8)); 92562bdaf70e31d294a47f9e3bbf3be35288653f67Keith Whitwell sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) ); 93562bdaf70e31d294a47f9e3bbf3be35288653f67Keith Whitwell sse_shufps(&p->func, dest, dest, SHUF(Y,Z,X,W) ); 941d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movlps(&p->func, dest, arg0); 95b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 96b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 97b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load4f_2( struct x86_program *p, 98b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 99b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 100b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 101c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell /* Initialize from identity, then pull in low two words: 102b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 1031d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movups(&p->func, dest, get_identity(p)); 1041d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movlps(&p->func, dest, arg0); 105b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 106b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 107b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load4f_1( struct x86_program *p, 108b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 109b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 110b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 111c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell /* Pull in low word, then swizzle in identity */ 1121d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movss(&p->func, dest, arg0); 113562bdaf70e31d294a47f9e3bbf3be35288653f67Keith Whitwell sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) ); 114b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 115b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 116b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 117b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 118b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load3f_3( struct x86_program *p, 119b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 120b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 121b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 1222b2bd08589099cb480b983835b01cc76a766a3c4Keith Whitwell /* Over-reads by 1 dword - potential SEGV if input is a vertex 1232b2bd08589099cb480b983835b01cc76a766a3c4Keith Whitwell * array. 124b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 125b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell if (p->inputs_safe) { 1261d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movups(&p->func, dest, arg0); 127b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 128b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell else { 129c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell /* c 0 0 0 130b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * c c c c 131b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * a b c c 132b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 1331d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movss(&p->func, dest, x86_make_disp(arg0, 8)); 134562bdaf70e31d294a47f9e3bbf3be35288653f67Keith Whitwell sse_shufps(&p->func, dest, dest, SHUF(X,X,X,X)); 1351d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movlps(&p->func, dest, arg0); 136b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 137b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 138b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 139b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load3f_2( struct x86_program *p, 140b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 141b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 142b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 143b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load4f_2(p, dest, arg0); 144b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 145b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 146b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load3f_1( struct x86_program *p, 147b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 148b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 149b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 150b66495a0d915f5d5cc5ab50c843c9c1b296a5851Guillaume Melquiond /* Loading from memory erases the upper bits. */ 151b66495a0d915f5d5cc5ab50c843c9c1b296a5851Guillaume Melquiond sse_movss(&p->func, dest, arg0); 152b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 153b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 154b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load2f_2( struct x86_program *p, 155b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 156b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 157b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 1581d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movlps(&p->func, dest, arg0); 159b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 160b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 161b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load2f_1( struct x86_program *p, 162b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 163b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 164b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 165b66495a0d915f5d5cc5ab50c843c9c1b296a5851Guillaume Melquiond /* Loading from memory erases the upper bits. */ 166b66495a0d915f5d5cc5ab50c843c9c1b296a5851Guillaume Melquiond sse_movss(&p->func, dest, arg0); 167b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 168b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 169b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load1f_1( struct x86_program *p, 170b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 171b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 172b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 1731d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movss(&p->func, dest, arg0); 174b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 175b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 176b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void (*load[4][4])( struct x86_program *p, 177b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 178b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) = { 179b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell { emit_load1f_1, 180b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load1f_1, 181b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load1f_1, 182b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load1f_1 }, 183b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 184b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell { emit_load2f_1, 185b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load2f_2, 186b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load2f_2, 187b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load2f_2 }, 188b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 189b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell { emit_load3f_1, 190b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load3f_2, 191b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load3f_3, 192b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load3f_3 }, 193b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 194b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell { emit_load4f_1, 195b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load4f_2, 196b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load4f_3, 197b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load4f_4 } 198b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}; 199b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 200b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load( struct x86_program *p, 201dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell struct x86_reg dest, 202b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLuint sz, 203b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src, 204b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLuint src_sz) 205b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 206dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell load[sz-1][src_sz-1](p, dest, src); 207b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 208b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 209b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_store4f( struct x86_program *p, 210b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 211b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 212b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 2131d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movups(&p->func, dest, arg0); 214b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 215b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 216b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_store3f( struct x86_program *p, 217b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 218b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 219b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 220b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell if (p->outputs_safe) { 221b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Emit the extra dword anyway. This may hurt writecombining, 222b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * may cause other problems. 223b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 2241d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movups(&p->func, dest, arg0); 225b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 226b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell else { 227b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Alternate strategy - emit two, shuffle, emit one. 228b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 2291d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movlps(&p->func, dest, arg0); 230562bdaf70e31d294a47f9e3bbf3be35288653f67Keith Whitwell sse_shufps(&p->func, arg0, arg0, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ 2311d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movss(&p->func, x86_make_disp(dest,8), arg0); 232b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 233b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 234b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 235b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_store2f( struct x86_program *p, 236b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 237b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 238b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 2391d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movlps(&p->func, dest, arg0); 240b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 241b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 242b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_store1f( struct x86_program *p, 243b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 244b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 245b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 2461d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movss(&p->func, dest, arg0); 247b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 248b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 249b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 250b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void (*store[4])( struct x86_program *p, 251b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 252b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) = 253b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 254b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store1f, 255b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store2f, 256b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store3f, 257b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store4f 258b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}; 259b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 260b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_store( struct x86_program *p, 261b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 262b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLuint sz, 263b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg temp ) 264b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 265b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 266b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell store[sz-1](p, dest, temp); 267b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 268b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 2696040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwellstatic void emit_pack_store_4ub( struct x86_program *p, 2706040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell struct x86_reg dest, 2716040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell struct x86_reg temp ) 2726040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell{ 273c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell /* Scale by 255.0 274c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell */ 2751d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_mulps(&p->func, temp, p->chan0); 276c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell 2776040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell if (p->have_sse2) { 2781d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse2_cvtps2dq(&p->func, temp, temp); 2791d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse2_packssdw(&p->func, temp, temp); 2801d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse2_packuswb(&p->func, temp, temp); 2811d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movss(&p->func, dest, temp); 2826040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell } 2836040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell else { 2841d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell struct x86_reg mmx0 = x86_make_reg(file_MMX, 0); 2851d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell struct x86_reg mmx1 = x86_make_reg(file_MMX, 1); 2861d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_cvtps2pi(&p->func, mmx0, temp); 2871d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movhlps(&p->func, temp, temp); 2881d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_cvtps2pi(&p->func, mmx1, temp); 2891d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell mmx_packssdw(&p->func, mmx0, mmx1); 2901d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell mmx_packuswb(&p->func, mmx0, mmx0); 2911d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell mmx_movd(&p->func, dest, mmx0); 2926040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell } 2936040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell} 294b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 295b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic GLint get_offset( const void *a, const void *b ) 296b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 297b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell return (const char *)b - (const char *)a; 298b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 299b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 300c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell/* Not much happens here. Eventually use this function to try and 301c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell * avoid saving/reloading the source pointers each vertex (if some of 302c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell * them can fit in registers). 303c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell */ 304c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwellstatic void get_src_ptr( struct x86_program *p, 305c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell struct x86_reg srcREG, 306c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell struct x86_reg vtxREG, 307c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell struct tnl_clipspace_attr *a ) 308c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell{ 309c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell struct tnl_clipspace *vtx = GET_VERTEX_STATE(p->ctx); 3101d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell struct x86_reg ptr_to_src = x86_make_disp(vtxREG, get_offset(vtx, &a->inputptr)); 311c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell 312c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell /* Load current a[j].inputptr 313c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell */ 3141d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_mov(&p->func, srcREG, ptr_to_src); 315c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell} 316c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell 317c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwellstatic void update_src_ptr( struct x86_program *p, 318c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell struct x86_reg srcREG, 319c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell struct x86_reg vtxREG, 320c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell struct tnl_clipspace_attr *a ) 321c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell{ 322c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell if (a->inputstride) { 323c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell struct tnl_clipspace *vtx = GET_VERTEX_STATE(p->ctx); 3241d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell struct x86_reg ptr_to_src = x86_make_disp(vtxREG, get_offset(vtx, &a->inputptr)); 325c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell 326c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell /* add a[j].inputstride (hardcoded value - could just as easily 327c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell * pull the stride value from memory each time). 328c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell */ 3291d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_lea(&p->func, srcREG, x86_make_disp(srcREG, a->inputstride)); 330c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell 331c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell /* save new value of a[j].inputptr 332c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell */ 3331d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_mov(&p->func, ptr_to_src, srcREG); 334c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell } 335c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell} 336b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 337b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 338b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Lots of hardcoding 339b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 340b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * EAX -- pointer to current output vertex 341b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * ECX -- pointer to current attribute 342b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 343b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 344b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic GLboolean build_vertex_emit( struct x86_program *p ) 345b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 346f9995b30756140724f41daf963fa06167912be7fKristian Høgsberg struct gl_context *ctx = p->ctx; 347b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell TNLcontext *tnl = TNL_CONTEXT(ctx); 348b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); 349c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell GLuint j = 0; 350b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 3511d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell struct x86_reg vertexEAX = x86_make_reg(file_REG32, reg_AX); 3521d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell struct x86_reg srcECX = x86_make_reg(file_REG32, reg_CX); 3531d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP); 3541d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell struct x86_reg vtxESI = x86_make_reg(file_REG32, reg_SI); 3551d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell struct x86_reg temp = x86_make_reg(file_XMM, 0); 3561d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell struct x86_reg vp0 = x86_make_reg(file_XMM, 1); 3571d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell struct x86_reg vp1 = x86_make_reg(file_XMM, 2); 3583b9bc821e1dfe39905585746166183264c335416Guillaume Melquiond struct x86_reg temp2 = x86_make_reg(file_XMM, 3); 359b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLubyte *fixup, *label; 360b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 361b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Push a few regs? 362b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 3631d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_push(&p->func, countEBP); 3641d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_push(&p->func, vtxESI); 365b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 366dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 367dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell /* Get vertex count, compare to zero 368dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell */ 3691d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_xor(&p->func, srcECX, srcECX); 3701d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_mov(&p->func, countEBP, x86_fn_arg(&p->func, 2)); 3711d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_cmp(&p->func, countEBP, srcECX); 3721d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell fixup = x86_jcc_forward(&p->func, cc_E); 373dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 374b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Initialize destination register. 375b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 3761d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_mov(&p->func, vertexEAX, x86_fn_arg(&p->func, 3)); 377b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 378b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Dereference ctx to get tnl, then vtx: 379b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 3801d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_mov(&p->func, vtxESI, x86_fn_arg(&p->func, 1)); 3811d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_mov(&p->func, vtxESI, x86_make_disp(vtxESI, get_offset(ctx, &ctx->swtnl_context))); 3821d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell vtxESI = x86_make_disp(vtxESI, get_offset(tnl, &tnl->clipspace)); 383b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 384b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 385b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Possibly load vp0, vp1 for viewport calcs: 386b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 387b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell if (vtx->need_viewport) { 3881d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movups(&p->func, vp0, x86_make_disp(vtxESI, get_offset(vtx, &vtx->vp_scale[0]))); 3891d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movups(&p->func, vp1, x86_make_disp(vtxESI, get_offset(vtx, &vtx->vp_xlate[0]))); 390b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 391b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 392dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell /* always load, needed or not: 393dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell */ 3941d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movups(&p->func, p->chan0, x86_make_disp(vtxESI, get_offset(vtx, &vtx->chan_scale[0]))); 3951d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movups(&p->func, p->identity, x86_make_disp(vtxESI, get_offset(vtx, &vtx->identity[0]))); 396dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 397b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Note address for loop jump */ 3981d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell label = x86_get_label(&p->func); 399b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 400b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Emit code for each of the attributes. Currently routes 401b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * everything through SSE registers, even when it might be more 402b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * efficient to stick with regular old x86. No optimization or 403b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * other tricks - enough new ground to cover here just getting 404b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * things working. 405b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 406c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell while (j < vtx->attr_count) { 407c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell struct tnl_clipspace_attr *a = &vtx->attr[j]; 4081d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell struct x86_reg dest = x86_make_disp(vertexEAX, a->vertoffset); 409b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 410b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Now, load an XMM reg from src, perhaps transform, then save. 411b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Could be shortcircuited in specific cases: 412b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 413c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell switch (a->format) { 414b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_1F: 415c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 4161d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize); 4176040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell emit_store(p, dest, 1, temp); 418c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 419dc7fc173966e314f89502473044933a099c838aeKeith Whitwell break; 420b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_2F: 421c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 4221d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize); 4236040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell emit_store(p, dest, 2, temp); 424c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 425dc7fc173966e314f89502473044933a099c838aeKeith Whitwell break; 426b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_3F: 427b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Potentially the worst case - hardcode 2+1 copying: 428b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 429c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell if (0) { 430c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 4311d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); 432c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell emit_store(p, dest, 3, temp); 433c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 434c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell } 435c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell else { 436c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 4371d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize); 438c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell emit_store(p, dest, 2, temp); 439c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell if (a->inputsize > 2) { 4401d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 1, x86_make_disp(srcECX, 8), 1); 4411d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_store(p, x86_make_disp(dest,8), 1, temp); 442c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell } 443c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell else { 4441d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_movss(&p->func, x86_make_disp(dest,8), get_identity(p)); 445c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell } 446c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 447c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell } 448dc7fc173966e314f89502473044933a099c838aeKeith Whitwell break; 449b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_4F: 450c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 4511d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); 4526040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell emit_store(p, dest, 4, temp); 453c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 454b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 455b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_2F_VIEWPORT: 456c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 4571d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize); 4581d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_mulps(&p->func, temp, vp0); 4591d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_addps(&p->func, temp, vp1); 4606040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell emit_store(p, dest, 2, temp); 461c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 462b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 463b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_3F_VIEWPORT: 464c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 4651d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); 4661d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_mulps(&p->func, temp, vp0); 4671d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_addps(&p->func, temp, vp1); 4686040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell emit_store(p, dest, 3, temp); 469c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 470b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 471b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_4F_VIEWPORT: 472c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 4731d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); 4741d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_mulps(&p->func, temp, vp0); 4751d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell sse_addps(&p->func, temp, vp1); 4766040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell emit_store(p, dest, 4, temp); 477c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 478b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 479b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_3F_XYW: 480c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 4811d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); 482562bdaf70e31d294a47f9e3bbf3be35288653f67Keith Whitwell sse_shufps(&p->func, temp, temp, SHUF(X,Y,W,Z)); 4836040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell emit_store(p, dest, 3, temp); 484c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 485b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 486b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 487b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_1UB_1F: 488c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell /* Test for PAD3 + 1UB: 489c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell */ 490c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell if (j > 0 && 491c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell a[-1].vertoffset + a[-1].vertattrsize <= a->vertoffset - 3) 492c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell { 493c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 4941d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize); 495562bdaf70e31d294a47f9e3bbf3be35288653f67Keith Whitwell sse_shufps(&p->func, temp, temp, SHUF(X,X,X,X)); 4961d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_pack_store_4ub(p, x86_make_disp(dest, -3), temp); /* overkill! */ 497c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 498c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell } 499c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell else { 500298be2b028263b2c343a707662c6fbfa18293cb2Kristian Høgsberg printf("Can't emit 1ub %x %x %d\n", a->vertoffset, a[-1].vertoffset, a[-1].vertattrsize ); 501c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell return GL_FALSE; 502c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell } 503c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell break; 504b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_3UB_3F_RGB: 505b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_3UB_3F_BGR: 506c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell /* Test for 3UB + PAD1: 507c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell */ 508c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell if (j == vtx->attr_count - 1 || 509c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell a[1].vertoffset >= a->vertoffset + 4) { 510c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 5111d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); 512c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell if (a->format == EMIT_3UB_3F_BGR) 513562bdaf70e31d294a47f9e3bbf3be35288653f67Keith Whitwell sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W)); 514c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell emit_pack_store_4ub(p, dest, temp); 515c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 516c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell } 517c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell /* Test for 3UB + 1UB: 518c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell */ 519c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell else if (j < vtx->attr_count - 1 && 520c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell a[1].format == EMIT_1UB_1F && 521c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell a[1].vertoffset == a->vertoffset + 3) { 522c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 5231d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); 524c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 525c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell 526c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell /* Make room for incoming value: 527c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell */ 528562bdaf70e31d294a47f9e3bbf3be35288653f67Keith Whitwell sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); 529c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell 530c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, &a[1]); 5313b9bc821e1dfe39905585746166183264c335416Guillaume Melquiond emit_load(p, temp2, 1, x86_deref(srcECX), a[1].inputsize); 5323b9bc821e1dfe39905585746166183264c335416Guillaume Melquiond sse_movss(&p->func, temp, temp2); 533c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, &a[1]); 534c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell 535c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell /* Rearrange and possibly do BGR conversion: 536c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell */ 537c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell if (a->format == EMIT_3UB_3F_BGR) 538562bdaf70e31d294a47f9e3bbf3be35288653f67Keith Whitwell sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X)); 539c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell else 540562bdaf70e31d294a47f9e3bbf3be35288653f67Keith Whitwell sse_shufps(&p->func, temp, temp, SHUF(Y,Z,W,X)); 541c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell 542c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell emit_pack_store_4ub(p, dest, temp); 543c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell j++; /* NOTE: two attrs consumed */ 544c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell } 545c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell else { 546298be2b028263b2c343a707662c6fbfa18293cb2Kristian Høgsberg printf("Can't emit 3ub\n"); 5476e29a3c8e2dc920b6216a0df6357abd8234f1ec4Guillaume Melquiond return GL_FALSE; /* add this later */ 548c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell } 549c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell break; 550b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 551b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_4UB_4F_RGBA: 552c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 5531d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); 5546040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell emit_pack_store_4ub(p, dest, temp); 555c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 556b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 557b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_4UB_4F_BGRA: 558c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 5591d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); 560562bdaf70e31d294a47f9e3bbf3be35288653f67Keith Whitwell sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W)); 5616040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell emit_pack_store_4ub(p, dest, temp); 562c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 563b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 564b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_4UB_4F_ARGB: 565c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 5661d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); 567562bdaf70e31d294a47f9e3bbf3be35288653f67Keith Whitwell sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); 5686040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell emit_pack_store_4ub(p, dest, temp); 569c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 570b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 571b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_4UB_4F_ABGR: 572c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 5731d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); 574562bdaf70e31d294a47f9e3bbf3be35288653f67Keith Whitwell sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X)); 5756040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell emit_pack_store_4ub(p, dest, temp); 576c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 577b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 578b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_4CHAN_4F_RGBA: 579b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell switch (CHAN_TYPE) { 580b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case GL_UNSIGNED_BYTE: 581c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 5821d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); 5836040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell emit_pack_store_4ub(p, dest, temp); 584c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 585b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 586b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case GL_FLOAT: 587c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell get_src_ptr(p, srcECX, vtxESI, a); 5881d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); 5896040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell emit_store(p, dest, 4, temp); 590c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell update_src_ptr(p, srcECX, vtxESI, a); 591b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 592dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell case GL_UNSIGNED_SHORT: 593b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell default: 594298be2b028263b2c343a707662c6fbfa18293cb2Kristian Høgsberg printf("unknown CHAN_TYPE %s\n", _mesa_lookup_enum_by_nr(CHAN_TYPE)); 595dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell return GL_FALSE; 596b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 597dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell break; 598b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell default: 599298be2b028263b2c343a707662c6fbfa18293cb2Kristian Høgsberg printf("unknown a[%d].format %d\n", j, a->format); 600b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell return GL_FALSE; /* catch any new opcodes */ 601b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 602b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 603c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell /* Increment j by at least 1 - may have been incremented above also: 604b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 605c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell j++; 606b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 607b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 608b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Next vertex: 609b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 6101d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_lea(&p->func, vertexEAX, x86_make_disp(vertexEAX, vtx->vertex_size)); 611b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 612b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* decr count, loop if not zero 613b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 6141d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_dec(&p->func, countEBP); 6151d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_test(&p->func, countEBP, countEBP); 6161d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_jcc(&p->func, cc_NZ, label); 6176040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell 6186040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell /* Exit mmx state? 6196040d34109cdca111df3ed86f9854ddb6d90525cKeith Whitwell */ 6201d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell if (p->func.need_emms) 6211d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell mmx_emms(&p->func); 622b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 623b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Land forward jump here: 624b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 6251d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_fixup_fwd_jump(&p->func, fixup); 626b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 627b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Pop regs and return 628b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 6291d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_pop(&p->func, x86_get_base_reg(vtxESI)); 6301d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_pop(&p->func, countEBP); 6311d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_ret(&p->func); 632b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 633417cb2c1829f2119f6674987edac09c61d633b45Brian assert(!vtx->emit); 6341d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell vtx->emit = (tnl_emit_func)x86_get_func(&p->func); 635417cb2c1829f2119f6674987edac09c61d633b45Brian 636417cb2c1829f2119f6674987edac09c61d633b45Brian assert( (char *) p->func.csr - (char *) p->func.store <= MAX_SSE_CODE_SIZE ); 637b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell return GL_TRUE; 638b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 639b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 64018a74321aa825c355392f98f1563a971871794ccKeith Whitwell 64118a74321aa825c355392f98f1563a971871794ccKeith Whitwell 642f9995b30756140724f41daf963fa06167912be7fKristian Høgsbergvoid _tnl_generate_sse_emit( struct gl_context *ctx ) 643b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 644b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); 64518a74321aa825c355392f98f1563a971871794ccKeith Whitwell struct x86_program p; 64618a74321aa825c355392f98f1563a971871794ccKeith Whitwell 64718a74321aa825c355392f98f1563a971871794ccKeith Whitwell if (!cpu_has_xmm) { 64818a74321aa825c355392f98f1563a971871794ccKeith Whitwell vtx->codegen_emit = NULL; 64918a74321aa825c355392f98f1563a971871794ccKeith Whitwell return; 65018a74321aa825c355392f98f1563a971871794ccKeith Whitwell } 651b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 65226f8fad1456fdc2b352cea9d3b4c32cb5f6ae947Kenneth Graunke memset(&p, 0, sizeof(p)); 653b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 6541d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell p.ctx = ctx; 65518a74321aa825c355392f98f1563a971871794ccKeith Whitwell p.inputs_safe = 0; /* for now */ 656a3b5ae783e18040349f1736c2ad812e0ec24a7b8Alan Hourihane p.outputs_safe = 0; /* for now */ 65718a74321aa825c355392f98f1563a971871794ccKeith Whitwell p.have_sse2 = cpu_has_xmm2; 6581d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell p.identity = x86_make_reg(file_XMM, 6); 6591d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell p.chan0 = x86_make_reg(file_XMM, 7); 6601d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell 66129054676a094c59b70cfec8b05da11741e53722cBrian if (!x86_init_func_size(&p.func, MAX_SSE_CODE_SIZE)) { 662417cb2c1829f2119f6674987edac09c61d633b45Brian vtx->emit = NULL; 663417cb2c1829f2119f6674987edac09c61d633b45Brian return; 664417cb2c1829f2119f6674987edac09c61d633b45Brian } 665c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell 666b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell if (build_vertex_emit(&p)) { 667b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell _tnl_register_fastpath( vtx, GL_TRUE ); 668b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 669b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell else { 670c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell /* Note the failure so that we don't keep trying to codegen an 671c2745ffa49e25aa2ff685ee8538a79baad4de54fKeith Whitwell * impossible state: 6722b2bd08589099cb480b983835b01cc76a766a3c4Keith Whitwell */ 6732b2bd08589099cb480b983835b01cc76a766a3c4Keith Whitwell _tnl_register_fastpath( vtx, GL_FALSE ); 6741d60469c4c0bcccd8b3310a49d18123a8c7f41e4Keith Whitwell x86_release_func(&p.func); 675b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 676b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 67718a74321aa825c355392f98f1563a971871794ccKeith Whitwell 67818a74321aa825c355392f98f1563a971871794ccKeith Whitwell#else 67918a74321aa825c355392f98f1563a971871794ccKeith Whitwell 680f9995b30756140724f41daf963fa06167912be7fKristian Høgsbergvoid _tnl_generate_sse_emit( struct gl_context *ctx ) 68118a74321aa825c355392f98f1563a971871794ccKeith Whitwell{ 68218a74321aa825c355392f98f1563a971871794ccKeith Whitwell /* Dummy version for when USE_SSE_ASM not defined */ 68318a74321aa825c355392f98f1563a971871794ccKeith Whitwell} 68418a74321aa825c355392f98f1563a971871794ccKeith Whitwell 68518a74321aa825c355392f98f1563a971871794ccKeith Whitwell#endif 686