t_vertex_sse.c revision dd4c1dd0382277b080fb4981e027250e10658ae8
1b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* 2b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Copyright 2003 Tungsten Graphics, inc. 3b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * All Rights Reserved. 4b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 5b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Permission is hereby granted, free of charge, to any person obtaining a 6b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * copy of this software and associated documentation files (the "Software"), 7b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * to deal in the Software without restriction, including without limitation 8b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * on the rights to use, copy, modify, merge, publish, distribute, sub 9b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * license, and/or sell copies of the Software, and to permit persons to whom 10b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * the Software is furnished to do so, subject to the following conditions: 11b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 12b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * The above copyright notice and this permission notice (including the next 13b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * paragraph) shall be included in all copies or substantial portions of the 14b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Software. 15b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 16b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * USE OR OTHER DEALINGS IN THE SOFTWARE. 23b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 24b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Authors: 25b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Keith Whitwell <keithw@tungstengraphics.com> 26b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 27b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 28b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include "glheader.h" 29b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include "context.h" 30b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include "colormac.h" 31b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include "t_context.h" 32b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include "t_vertex.h" 33b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include "simple_list.h" 34b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 35b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include <unistd.h> 36b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include <sys/types.h> 37b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include <sys/stat.h> 38b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include <fcntl.h> 39b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 40b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define X 0 41b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define Y 1 42b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define Z 2 43b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define W 3 44b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 45dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell#define DISASSEM 1 46dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 47b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstruct x86_reg { 48b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLuint file:3; 49b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLuint idx:3; 50b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLuint mod:2; /* mod_REG if this is just a register */ 51b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLint disp:24; /* only +/- 23bits of offset - should be enough... */ 52b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}; 53b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 54b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstruct x86_program { 55b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLcontext *ctx; 56b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 57b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLubyte *store; 58b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLubyte *csr; 59b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 60b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLuint stack_offset; 61b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 62b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLboolean inputs_safe; 63b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLboolean outputs_safe; 64b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 65b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg identity; 66b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg vp0; 67b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg vp1; 68b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}; 69b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 70b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 71b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define X86_TWOB 0x0f 72b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 73b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* There are more but these are all we'll use: 74b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 75b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellenum x86_reg_file { 76b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell file_REG32, 77b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell file_XMM 78b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}; 79b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 80b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Values for mod field of modr/m byte 81b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 82b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellenum x86_reg_mod { 83b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell mod_INDIRECT, 84b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell mod_DISP8, 85b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell mod_DISP32, 86b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell mod_REG 87b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}; 88b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 89b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellenum x86_reg_name { 90b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg_AX, 91b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg_CX, 92b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg_DX, 93b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg_BX, 94b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg_SP, 95b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg_BP, 96b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg_SI, 97b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg_DI 98b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}; 99b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 100b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 101b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellenum x86_cc { 102b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell cc_O, /* overflow */ 103b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell cc_NO, /* not overflow */ 104b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell cc_NAE, /* not above or equal / carry */ 105b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell cc_AE, /* above or equal / not carry */ 106b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell cc_E, /* equal / zero */ 107b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell cc_NE /* not equal / not zero */ 108b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}; 109b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 110b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define cc_Z cc_E 111b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define cc_NZ cc_NE 112b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 113b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 114b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Create and manipulate registers and regmem values: 115b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 116b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic struct x86_reg make_reg( GLuint file, 117b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLuint idx ) 118b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 119b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg reg; 120b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 121b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg.file = file; 122b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg.idx = idx; 123b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg.mod = mod_REG; 124b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg.disp = 0; 125b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 126b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell return reg; 127b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 128b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 129b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic struct x86_reg make_disp( struct x86_reg reg, 130b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLint disp ) 131b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 132b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell assert(reg.file == file_REG32); 133b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 134b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell if (reg.mod == mod_REG) 135b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg.disp = disp; 136b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell else 137b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg.disp += disp; 138b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 139b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell if (reg.disp == 0) 140b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg.mod = mod_INDIRECT; 141b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell else if (reg.disp <= 127 && reg.disp >= -128) 142b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg.mod = mod_DISP8; 143b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell else 144b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg.mod = mod_DISP32; 145b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 146b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell return reg; 147b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 148b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 149dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic struct x86_reg deref( struct x86_reg reg ) 150dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell{ 151dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell return make_disp(reg, 0); 152dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell} 153dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 154dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic struct x86_reg get_base_reg( struct x86_reg reg ) 155dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell{ 156dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell return make_reg( reg.file, reg.idx ); 157dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell} 158dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 159dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 160b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Retreive a reference to one of the function arguments, taking into 161b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * account any push/pop activity: 162b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 163b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic struct x86_reg make_fn_arg( struct x86_program *p, 164b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLuint arg ) 165b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 166b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell return make_disp(make_reg(file_REG32, reg_SP), 167b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell p->stack_offset + arg * 4); /* ??? */ 168b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 169b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 170b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 171b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic struct x86_reg get_identity( struct x86_program *p ) 172b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 173b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell return p->identity; 174b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 175b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 176b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic struct x86_reg get_sse_temp( struct x86_program *p ) 177b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 178b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell return make_reg(file_XMM, 7); /* hardwired */ 179b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 180b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 181b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void release_temp( struct x86_program *p, 182b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg reg ) 183b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 184b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell assert(reg.file == file_XMM && 185b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell reg.idx == 7); 186b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 187b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 188b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Emit bytes to the instruction stream: 189b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 190b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_1b( struct x86_program *p, GLbyte b0 ) 191b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 192b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell *(GLbyte *)(p->csr++) = b0; 193b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 194b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 195dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic void emit_1i( struct x86_program *p, GLint i0 ) 196dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell{ 197dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell *(GLint *)(p->csr) = i0; 198dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell p->csr += 4; 199dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell} 200dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 201dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic void disassem( struct x86_program *p, const char *fn ) 202dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell{ 203dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell#if DISASSEM 204dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell static const char *last_fn; 205dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell if (fn && fn != last_fn) { 206dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell _mesa_printf("0x%x: %s\n", p->csr, fn); 207dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell last_fn = fn; 208dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell } 209dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell#endif 210dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell} 211dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 212dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic void emit_1ub_fn( struct x86_program *p, GLubyte b0, const char *fn ) 213b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 214dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell disassem(p, fn); 215b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell *(p->csr++) = b0; 216b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 217b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 218dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic void emit_2ub_fn( struct x86_program *p, GLubyte b0, GLubyte b1, const char *fn ) 219b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 220dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell disassem(p, fn); 221b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell *(p->csr++) = b0; 222b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell *(p->csr++) = b1; 223b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 224b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 225dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic void emit_3ub_fn( struct x86_program *p, GLubyte b0, GLubyte b1, GLubyte b2, const char *fn ) 226b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 227dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell disassem(p, fn); 228b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell *(p->csr++) = b0; 229b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell *(p->csr++) = b1; 230b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell *(p->csr++) = b2; 231b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 232b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 233dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell#define emit_1ub(p, b0) emit_1ub_fn(p, b0, __FUNCTION__) 234dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell#define emit_2ub(p, b0, b1) emit_2ub_fn(p, b0, b1, __FUNCTION__) 235dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell#define emit_3ub(p, b0, b1, b2) emit_3ub_fn(p, b0, b1, b2, __FUNCTION__) 236b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 237b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 238b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Labels, jumps and fixup: 239b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 240b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic GLubyte *get_label( struct x86_program *p ) 241b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 242b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell return p->csr; 243b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 244b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 245b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_jcc( struct x86_program *p, 246b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLuint cc, 247b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLubyte *label ) 248b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 249b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLint offset = label - (get_label(p) + 2); 250dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 251b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell if (offset <= 127 && offset >= -128) { 252b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_1ub(p, 0x70 + cc); 253b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_1b(p, (GLbyte) offset); 254b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 255b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell else { 256b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell offset = label - (get_label(p) + 5); 257b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_2ub(p, 0x0f, 0x80 + cc); 258b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_1i(p, offset); 259b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 260b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 261b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 262b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Always use a 32bit offset for forward jumps: 263b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 264b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic GLubyte *emit_jcc_forward( struct x86_program *p, 265b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLuint cc ) 266b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 267b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_2ub(p, 0x0f, 0x80 + cc); 268b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_1i(p, 0); 269b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell return get_label(p); 270b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 271b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 272b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Fixup offset from forward jump: 273b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 274b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void do_fixup( struct x86_program *p, 275b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLubyte *fixup ) 276b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 277b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell *(int *)(fixup - 4) = get_label(p) - fixup; 278b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 279b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 280b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_push( struct x86_program *p, 281b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg reg ) 282b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 283b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell assert(reg.mod == mod_REG); 284b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_1ub(p, 0x50 + reg.idx); 285b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell p->stack_offset += 4; 286b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 287b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 288b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_pop( struct x86_program *p, 289b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg reg ) 290b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 291b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell assert(reg.mod == mod_REG); 292b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_1ub(p, 0x58 + reg.idx); 293b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell p->stack_offset -= 4; 294b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 295b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 296b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_inc( struct x86_program *p, 297b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg reg ) 298b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 299b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell assert(reg.mod == mod_REG); 300b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_1ub(p, 0x40 + reg.idx); 301b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 302b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 303b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_dec( struct x86_program *p, 304b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg reg ) 305b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 306b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell assert(reg.mod == mod_REG); 307dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_1ub(p, 0x48 + reg.idx); 308b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 309b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 310b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_ret( struct x86_program *p ) 311b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 312b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_1ub(p, 0xc3); 313b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 314b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 315b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 316b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 317b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 318b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Build a modRM byte + possible displacement. No treatment of SIB 319b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * indexing. BZZT - no way to encode an absolute address. 320b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 321b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_modrm( struct x86_program *p, 322b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg reg, 323b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg regmem ) 324b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 325b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLubyte val = 0; 326b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 327b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell assert(reg.mod == mod_REG); 328b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 329b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell val |= regmem.mod << 6; /* mod field */ 330b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell val |= reg.idx << 3; /* reg field */ 331b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell val |= regmem.idx; /* r/m field */ 332b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 333dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_1ub_fn(p, val, 0); 334dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 335dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell /* Oh-oh we've stumbled into the SIB thing. 336dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell */ 337dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell if (regmem.idx == reg_SP) { 338dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_1ub_fn(p, 0x24, 0); /* simplistic! */ 339dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell } 340b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 341b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell switch (regmem.mod) { 342b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case mod_REG: 343b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case mod_INDIRECT: 344b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 345b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case mod_DISP8: 346b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_1b(p, regmem.disp); 347dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell break; 348b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case mod_DISP32: 349b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_1i(p, regmem.disp); 350dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell break; 351b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 352b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 353b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 354b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Many x86 instructions have two opcodes to cope with the situations 355b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * where the destination is a register or memory reference 356b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * respectively. This function selects the correct opcode based on 357b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * the arguments presented. 358b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 359b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_op_modrm( struct x86_program *p, 360b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLubyte op_dst_is_reg, 361b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLubyte op_dst_is_mem, 362b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dst, 363b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src ) 364b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 365b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell switch (dst.mod) { 366b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case mod_REG: 367dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_1ub_fn(p, op_dst_is_reg, 0); 368b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_modrm(p, dst, src); 369b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 370b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case mod_INDIRECT: 371b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case mod_DISP32: 372b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case mod_DISP8: 373b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell assert(src.mod == mod_REG); 374dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_1ub_fn(p, op_dst_is_mem, 0); 375b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_modrm(p, src, dst); 376b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 377b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 378b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 379b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 380b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_mov( struct x86_program *p, 381b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dst, 382b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src ) 383b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 384b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_op_modrm( p, 0x8b, 0x89, dst, src ); 385b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 386b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 387b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_xor( struct x86_program *p, 388b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dst, 389b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src ) 390b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 391b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_op_modrm( p, 0x33, 0x31, dst, src ); 392b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 393b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 394dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic void emit_cmp( struct x86_program *p, 395dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell struct x86_reg dst, 396dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell struct x86_reg src ) 397dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell{ 398dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_op_modrm( p, 0x3b, 0x39, dst, src ); 399dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell} 400dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 401b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_movlps( struct x86_program *p, 402b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dst, 403b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src ) 404b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 405b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_1ub(p, X86_TWOB); 406b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_op_modrm( p, 0x12, 0x13, dst, src ); 407b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 408b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 409b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_movhps( struct x86_program *p, 410b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dst, 411b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src ) 412b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 413b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_1ub(p, X86_TWOB); 414b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_op_modrm( p, 0x16, 0x17, dst, src ); 415b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 416b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 417b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_movd( struct x86_program *p, 418b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dst, 419b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src ) 420b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 421b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_2ub(p, 0x66, X86_TWOB); 422b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_op_modrm( p, 0x6e, 0x7e, dst, src ); 423b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 424b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 425b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_movss( struct x86_program *p, 426b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dst, 427b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src ) 428b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 429b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_2ub(p, 0xF3, X86_TWOB); 430b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_op_modrm( p, 0x10, 0x11, dst, src ); 431b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 432b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 433b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_movaps( struct x86_program *p, 434b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dst, 435b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src ) 436b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 437b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_1ub(p, X86_TWOB); 438b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_op_modrm( p, 0x28, 0x29, dst, src ); 439b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 440b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 441b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_movups( struct x86_program *p, 442b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dst, 443b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src ) 444b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 445b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_1ub(p, X86_TWOB); 446b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_op_modrm( p, 0x10, 0x11, dst, src ); 447b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 448b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 449b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* SSE operations often only have one format, with dest constrained to 450b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * be a register: 451b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 452b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_mulps( struct x86_program *p, 453b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dst, 454b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src ) 455b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 456b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_2ub(p, X86_TWOB, 0x59); 457b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_modrm( p, dst, src ); 458b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 459b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 460b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_addps( struct x86_program *p, 461b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dst, 462b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src ) 463b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 464b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_2ub(p, X86_TWOB, 0x58); 465b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_modrm( p, dst, src ); 466b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 467b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 468b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_cvtps2dq( struct x86_program *p, 469b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dst, 470b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src ) 471b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 472b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_3ub(p, 0x66, X86_TWOB, 0x5B); 473b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_modrm( p, dst, src ); 474b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 475b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 476b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_packssdw( struct x86_program *p, 477b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dst, 478b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src ) 479b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 480b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_3ub(p, 0x66, X86_TWOB, 0x6B); 481b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_modrm( p, dst, src ); 482b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 483b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 484b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_packsswb( struct x86_program *p, 485b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dst, 486b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src ) 487b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 488b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_3ub(p, 0x66, X86_TWOB, 0x63); 489b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_modrm( p, dst, src ); 490b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 491b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 492dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic void emit_packuswb( struct x86_program *p, 493dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell struct x86_reg dst, 494dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell struct x86_reg src ) 495dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell{ 496dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_3ub(p, 0x66, X86_TWOB, 0x67); 497dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_modrm( p, dst, src ); 498dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell} 499dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 500b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Load effective address: 501b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 502b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_lea( struct x86_program *p, 503b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dst, 504b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src ) 505b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 506b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_1ub(p, 0x8d); 507b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_modrm( p, dst, src ); 508b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 509b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 510b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_add_imm( struct x86_program *p, 511b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dst, 512b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src, 513b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLint value ) 514b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 515b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_lea(p, dst, make_disp(src, value)); 516b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 517b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 518dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic void emit_test( struct x86_program *p, 519dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell struct x86_reg dst, 520dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell struct x86_reg src ) 521dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell{ 522dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_1ub(p, 0x85); 523dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_modrm( p, dst, src ); 524dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell} 525dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 526b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 527b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 528b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 529b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/** 530b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Perform a reduced swizzle: 531b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 532b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_pshufd( struct x86_program *p, 533b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 534b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0, 535b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLubyte x, 536b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLubyte y, 537b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLubyte z, 538b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLubyte w) 539b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 540b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_3ub(p, 0x66, X86_TWOB, 0x70); 541b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_modrm(p, dest, arg0); 542b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_1ub(p, (x|(y<<2)|(z<<4)|w<<6)); 543b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 544b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 545b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 546b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_pk4ub( struct x86_program *p, 547b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 548b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 549b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 550b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_cvtps2dq(p, dest, arg0); 551b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_packssdw(p, dest, dest); 552dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_packuswb(p, dest, dest); 553b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 554b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 555b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load4f_4( struct x86_program *p, 556b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 557b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 558b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 559b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movups(p, dest, arg0); 560b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 561b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 562b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load4f_3( struct x86_program *p, 563b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 564b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 565b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 566b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Have to jump through some hoops: 567b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 568b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 0 0 0 1 -- skip if reg[3] preserved over loop iterations 569b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * c 0 0 1 570b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 0 0 c 1 571b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * a b c 1 572b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 573b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movups(p, dest, get_identity(p)); 574b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movss(p, dest, make_disp(arg0, 8)); 575b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_pshufd(p, dest, dest, Y,Z,X,W ); 576b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movlps(p, dest, arg0); 577b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 578b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 579b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load4f_2( struct x86_program *p, 580b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 581b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 582b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 583b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Pull in 2 dwords, then copy the top 2 dwords with 0,1 from id. 584b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 585b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movlps(p, dest, arg0); 586b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movhps(p, dest, get_identity(p)); 587b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 588b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 589b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load4f_1( struct x86_program *p, 590b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 591b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 592b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 593b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Initialized with [0,0,0,1] from id, then pull in the single low 594b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * word. 595b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 596b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movaps(p, dest, get_identity(p)); 597b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movss(p, dest, arg0); 598b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 599b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 600b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 601b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 602b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load3f_3( struct x86_program *p, 603b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 604b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 605b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 606b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Over-reads by 1 dword - potential SEGV... Deal with in 607b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * array_cache by treating size-3 arrays specially, copying to 608b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * temporary storage if last element (how can you tell?) falls on a 609b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 4k boundary. 610b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 611b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell if (p->inputs_safe) { 612b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movaps(p, dest, arg0); 613b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 614b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell else { 615b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* c . . . 616b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * c c c c 617b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * a b c c 618b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 619b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movss(p, dest, make_disp(arg0, 8)); 620b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_pshufd(p, dest, dest, X,X,X,X); 621b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movlps(p, dest, arg0); 622b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 623b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 624b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 625b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load3f_2( struct x86_program *p, 626b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 627b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 628b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 629b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load4f_2(p, dest, arg0); 630b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 631b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 632b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load3f_1( struct x86_program *p, 633b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 634b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 635b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 636b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load4f_1(p, dest, arg0); 637b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 638b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 639b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load2f_2( struct x86_program *p, 640b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 641b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 642b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 643b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movlps(p, dest, arg0); 644b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 645b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 646b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load2f_1( struct x86_program *p, 647b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 648b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 649b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 650b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load4f_1(p, dest, arg0); 651b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 652b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 653b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load1f_1( struct x86_program *p, 654b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 655b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 656b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 657b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movss(p, dest, arg0); 658b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 659b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 660b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void (*load[4][4])( struct x86_program *p, 661b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 662b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) = { 663b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell { emit_load1f_1, 664b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load1f_1, 665b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load1f_1, 666b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load1f_1 }, 667b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 668b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell { emit_load2f_1, 669b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load2f_2, 670b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load2f_2, 671b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load2f_2 }, 672b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 673b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell { emit_load3f_1, 674b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load3f_2, 675b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load3f_3, 676b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load3f_3 }, 677b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 678b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell { emit_load4f_1, 679b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load4f_2, 680b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load4f_3, 681b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_load4f_4 } 682b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}; 683b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 684b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load( struct x86_program *p, 685dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell struct x86_reg dest, 686b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLuint sz, 687b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg src, 688b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLuint src_sz) 689b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 690dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell load[sz-1][src_sz-1](p, dest, src); 691b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 692b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 693b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 694b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_store4f( struct x86_program *p, 695b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 696b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 697b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 698b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movups(p, dest, arg0); 699b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 700b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 701b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_store3f( struct x86_program *p, 702b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 703b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 704b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 705b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell if (p->outputs_safe) { 706b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Emit the extra dword anyway. This may hurt writecombining, 707b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * may cause other problems. 708b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 709b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movups(p, dest, arg0); 710b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 711b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell else { 712b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Alternate strategy - emit two, shuffle, emit one. 713b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 714b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg tmp = get_sse_temp(p); 715b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movlps(p, dest, arg0); 716b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 717b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_pshufd(p, tmp, arg0, Z, Z, Z, Z ); 718b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movss(p, make_disp(dest,8), tmp); 719b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell release_temp(p, tmp); 720b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 721b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 722b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 723b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_store2f( struct x86_program *p, 724b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 725b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 726b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 727b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movlps(p, dest, arg0); 728b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 729b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 730b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_store1f( struct x86_program *p, 731b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 732b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) 733b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 734b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movss(p, dest, arg0); 735b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 736b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 737b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 738b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void (*store[4])( struct x86_program *p, 739b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 740b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg arg0 ) = 741b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 742b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store1f, 743b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store2f, 744b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store3f, 745b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store4f 746b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}; 747b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 748b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_store( struct x86_program *p, 749b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest, 750b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLuint sz, 751b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg temp ) 752b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 753b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 754b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell store[sz-1](p, dest, temp); 755b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 756b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 757b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 758b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic GLint get_offset( const void *a, const void *b ) 759b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 760b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell return (const char *)b - (const char *)a; 761b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 762b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 763b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 764b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 765b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Lots of hardcoding 766b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 767b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * EAX -- pointer to current output vertex 768b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * ECX -- pointer to current attribute 769b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * 770b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 771b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic GLboolean build_vertex_emit( struct x86_program *p ) 772b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 773b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLcontext *ctx = p->ctx; 774b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell TNLcontext *tnl = TNL_CONTEXT(ctx); 775b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); 776b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct tnl_clipspace_attr *a = vtx->attr; 777b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLuint j; 778b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 779b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg vertexEAX = make_reg(file_REG32, reg_AX); 780b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg srcEDI = make_reg(file_REG32, reg_CX); 781b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg countEBP = make_reg(file_REG32, reg_BP); 782b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg vtxESI = make_reg(file_REG32, reg_SI); 783b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg tmp = make_reg(file_XMM, 0); 784b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg vp0 = make_reg(file_XMM, 1); 785b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg vp1 = make_reg(file_XMM, 2); 786dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell struct x86_reg chan0 = make_reg(file_XMM, 3); 787b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell GLubyte *fixup, *label; 788b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 789b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell p->csr = p->store; 790b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 791b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Push a few regs? 792b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 793b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_push(p, srcEDI); 794b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_push(p, countEBP); 795b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_push(p, vtxESI); 796b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 797dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 798dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell /* Get vertex count, compare to zero 799dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell */ 800dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_xor(p, srcEDI, srcEDI); 801dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_mov(p, countEBP, make_fn_arg(p, 2)); 802dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_cmp(p, countEBP, srcEDI); 803dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell fixup = emit_jcc_forward(p, cc_E); 804dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 805dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 806b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Initialize destination register. 807b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 808b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_mov(p, vertexEAX, make_fn_arg(p, 3)); 809b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 810b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Dereference ctx to get tnl, then vtx: 811b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 812b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_mov(p, vtxESI, make_fn_arg(p, 1)); 813b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_mov(p, vtxESI, make_disp(vtxESI, get_offset(ctx, &ctx->swtnl_context))); 814b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell vtxESI = make_disp(vtxESI, get_offset(tnl, &tnl->clipspace)); 815b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 816b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 817b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Possibly load vp0, vp1 for viewport calcs: 818b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 819b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell if (vtx->need_viewport) { 820b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movups(p, vp0, make_disp(vtxESI, get_offset(vtx, &vtx->vp_scale[0]))); 821b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_movups(p, vp1, make_disp(vtxESI, get_offset(vtx, &vtx->vp_xlate[0]))); 822b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 823b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 824dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell /* always load, needed or not: 825dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell */ 826dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_movups(p, chan0, make_disp(vtxESI, get_offset(vtx, &vtx->chan_scale[0]))); 827dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell 828b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Note address for loop jump */ 829b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell label = get_label(p); 830b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 831b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Emit code for each of the attributes. Currently routes 832b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * everything through SSE registers, even when it might be more 833b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * efficient to stick with regular old x86. No optimization or 834b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * other tricks - enough new ground to cover here just getting 835b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * things working. 836b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 837b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell for (j = 0; j < vtx->attr_count; j++) { 838b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg dest = make_disp(vertexEAX, vtx->attr[j].vertoffset); 839b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_reg ptr_to_src = make_disp(vtxESI, get_offset(vtx, &vtx->attr[j].inputptr)); 840b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 841b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Load current a[j].inputptr 842b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 843b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_mov(p, srcEDI, ptr_to_src); 844b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 845b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Now, load an XMM reg from src, perhaps transform, then save. 846b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Could be shortcircuited in specific cases: 847b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 848b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell switch (a[j].format) { 849b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_1F: 850dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_load(p, tmp, 1, deref(srcEDI), vtx->attr[j].inputsize); 851b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store(p, dest, 1, tmp); 852b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_2F: 853dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_load(p, tmp, 2, deref(srcEDI), vtx->attr[j].inputsize); 854b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store(p, dest, 2, tmp); 855b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_3F: 856b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Potentially the worst case - hardcode 2+1 copying: 857b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 858dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_load(p, tmp, 3, deref(srcEDI), vtx->attr[j].inputsize); 859b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store(p, dest, 3, tmp); 860b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_4F: 861dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize); 862b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store(p, dest, 4, tmp); 863b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 864b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_2F_VIEWPORT: 865dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_load(p, tmp, 2, deref(srcEDI), vtx->attr[j].inputsize); 866dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_mulps(p, tmp, vp0); 867dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_addps(p, tmp, vp1); 868b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store(p, dest, 2, tmp); 869b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 870b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_3F_VIEWPORT: 871dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_load(p, tmp, 3, deref(srcEDI), vtx->attr[j].inputsize); 872dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_mulps(p, tmp, vp0); 873dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_addps(p, tmp, vp1); 874b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store(p, dest, 3, tmp); 875b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 876b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_4F_VIEWPORT: 877dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize); 878dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_mulps(p, tmp, vp0); 879dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_addps(p, tmp, vp1); 880b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store(p, dest, 4, tmp); 881b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 882b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_3F_XYW: 883dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize); 884b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_pshufd(p, tmp, tmp, X, Y, W, Z); 885b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store(p, dest, 3, tmp); 886b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 887b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 888b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Try and bond 3ub + 1ub pairs into a single 4ub operation? 889b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 890b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_1UB_1F: 891b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_3UB_3F_RGB: 892b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_3UB_3F_BGR: 893dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell _mesa_printf("non-implemneted format %d\n", a[j].format); 894b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell return GL_FALSE; /* add this later */ 895b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 896b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_4UB_4F_RGBA: 897dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize); 898dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_mulps(p, tmp, chan0); 899b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_pk4ub(p, tmp, tmp); 900b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store(p, dest, 1, tmp); 901b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 902b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_4UB_4F_BGRA: 903dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize); 904b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_pshufd(p, tmp, tmp, Z, Y, X, W); 905dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_mulps(p, tmp, chan0); 906b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_pk4ub(p, tmp, tmp); 907b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store(p, dest, 1, tmp); 908b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 909b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_4UB_4F_ARGB: 910dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize); 911b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_pshufd(p, tmp, tmp, W, X, Y, Z); 912dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_mulps(p, tmp, chan0); 913b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_pk4ub(p, tmp, tmp); 914b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store(p, dest, 1, tmp); 915b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 916b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_4UB_4F_ABGR: 917dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize); 918b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_pshufd(p, tmp, tmp, W, Z, Y, X); 919dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_mulps(p, tmp, chan0); 920b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_pk4ub(p, tmp, tmp); 921b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store(p, dest, 1, tmp); 922b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 923b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case EMIT_4CHAN_4F_RGBA: 924b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell switch (CHAN_TYPE) { 925b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case GL_UNSIGNED_BYTE: 926dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize); 927dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_mulps(p, tmp, chan0); 928b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_pk4ub(p, tmp, tmp); 929b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store(p, dest, 1, tmp); 930b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 931b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell case GL_FLOAT: 932dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize); 933b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_store(p, dest, 4, tmp); 934b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell break; 935dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell case GL_UNSIGNED_SHORT: 936b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell default: 937dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell _mesa_printf("unknown CHAN_TYPE %s\n", _mesa_lookup_enum_by_nr(CHAN_TYPE)); 938dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell return GL_FALSE; 939b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 940dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell break; 941b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell default: 942dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell _mesa_printf("unknown a[%d].format %d\n", j, a[j].format); 943b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell return GL_FALSE; /* catch any new opcodes */ 944b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 945b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 946b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* add a[j].inputstride (hardcoded value - could just as easily 947b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * pull the stride value from memory each time). 948b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 949b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_add_imm(p, srcEDI, srcEDI, a[j].inputstride); 950b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 951b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* save new value of a[j].inputptr 952b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 953b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_mov(p, ptr_to_src, srcEDI); 954b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 955b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 956b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 957b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Next vertex: 958b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 959b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_add_imm(p, vertexEAX, vertexEAX, vtx->vertex_size); 960b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 961b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* decr count, loop if not zero 962b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 963b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_dec(p, countEBP); 964dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_test(p, countEBP, countEBP); 965b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_jcc(p, cc_NZ, label); 966b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 967b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Land forward jump here: 968b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 969b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell do_fixup(p, fixup); 970b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 971b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell /* Pop regs and return 972b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */ 973dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell emit_pop(p, get_base_reg(vtxESI)); 974b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_pop(p, countEBP); 975b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_pop(p, srcEDI); 976b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell emit_ret(p); 977b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 978b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell vtx->emit = (tnl_emit_func)p->store; 979b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell return GL_TRUE; 980b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 981b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 982b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellvoid _tnl_generate_sse_emit( GLcontext *ctx ) 983b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{ 984b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); 985b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell struct x86_program p; 986b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 987b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell memset(&p, 0, sizeof(p)); 988b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell p.ctx = ctx; 989b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell p.store = MALLOC(1024); 990b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 991b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell p.inputs_safe = 1; /* for now */ 992b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell p.outputs_safe = 1; /* for now */ 993b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 994b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell if (build_vertex_emit(&p)) { 995b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell _tnl_register_fastpath( vtx, GL_TRUE ); 996dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell if (DISASSEM) 997dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell _mesa_printf("disassemble 0x%x 0x%x\n", p.store, p.csr); 998b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 999b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell else { 1000b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell FREE(p.store); 1001b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell } 1002b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell 1003b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell (void)emit_movd; 1004b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell (void)emit_inc; 1005b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell (void)emit_xor; 1006b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell} 1007