1/************************************************************************** 2 * 3 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included 13 * in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 19 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 * 22 **************************************************************************/ 23 24#ifndef _RTASM_X86SSE_H_ 25#define _RTASM_X86SSE_H_ 26 27#include "pipe/p_compiler.h" 28#include "pipe/p_config.h" 29 30#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 31 32/* It is up to the caller to ensure that instructions issued are 33 * suitable for the host cpu. There are no checks made in this module 34 * for mmx/sse/sse2 support on the cpu. 35 */ 36struct x86_reg { 37 unsigned file:2; 38 unsigned idx:4; 39 unsigned mod:2; /* mod_REG if this is just a register */ 40 int disp:24; /* only +/- 23bits of offset - should be enough... */ 41}; 42 43#define X86_MMX 1 44#define X86_MMX2 2 45#define X86_SSE 4 46#define X86_SSE2 8 47#define X86_SSE3 0x10 48#define X86_SSE4_1 0x20 49 50struct x86_function { 51 unsigned caps; 52 unsigned size; 53 unsigned char *store; 54 unsigned char *csr; 55 56 unsigned stack_offset:16; 57 unsigned need_emms:8; 58 int x87_stack:8; 59 60 unsigned char error_overflow[4]; 61}; 62 63enum x86_reg_file { 64 file_REG32, 65 file_MMX, 66 file_XMM, 67 file_x87 68}; 69 70/* Values for mod field of modr/m byte 71 */ 72enum x86_reg_mod { 73 mod_INDIRECT, 74 mod_DISP8, 75 mod_DISP32, 76 mod_REG 77}; 78 79enum x86_reg_name { 80 reg_AX, 81 reg_CX, 82 reg_DX, 83 reg_BX, 84 reg_SP, 85 reg_BP, 86 reg_SI, 87 reg_DI, 88 reg_R8, 89 reg_R9, 90 reg_R10, 91 reg_R11, 92 reg_R12, 93 reg_R13, 94 reg_R14, 95 reg_R15 96}; 97 98 99enum x86_cc { 100 cc_O, /* overflow */ 101 cc_NO, /* not overflow */ 102 cc_NAE, /* not above or equal / carry */ 103 cc_AE, /* above or equal / not carry */ 104 cc_E, /* equal / zero */ 105 cc_NE /* not equal / not zero */ 106}; 107 108enum sse_cc { 109 cc_Equal, 110 cc_LessThan, 111 cc_LessThanEqual, 112 cc_Unordered, 113 cc_NotEqual, 114 cc_NotLessThan, 115 cc_NotLessThanEqual, 116 cc_Ordered 117}; 118 119#define cc_Z cc_E 120#define cc_NZ cc_NE 121 122 123/** generic pointer to function */ 124typedef void (*x86_func)(void); 125 126 127/* Begin/end/retrieve function creation: 128 */ 129 130enum x86_target 131{ 132 X86_32, 133 X86_64_STD_ABI, 134 X86_64_WIN64_ABI 135}; 136 137/* make this read a member of x86_function if target != host is desired */ 138static INLINE enum x86_target x86_target( struct x86_function* p ) 139{ 140#ifdef PIPE_ARCH_X86 141 return X86_32; 142#elif defined(_WIN64) 143 return X86_64_WIN64_ABI; 144#elif defined(PIPE_ARCH_X86_64) 145 return X86_64_STD_ABI; 146#endif 147} 148 149static INLINE unsigned x86_target_caps( struct x86_function* p ) 150{ 151 return p->caps; 152} 153 154void x86_init_func( struct x86_function *p ); 155void x86_init_func_size( struct x86_function *p, unsigned code_size ); 156void x86_release_func( struct x86_function *p ); 157x86_func x86_get_func( struct x86_function *p ); 158 159/* Debugging: 160 */ 161void x86_print_reg( struct x86_reg reg ); 162 163 164/* Create and manipulate registers and regmem values: 165 */ 166struct x86_reg x86_make_reg( enum x86_reg_file file, 167 enum x86_reg_name idx ); 168 169struct x86_reg x86_make_disp( struct x86_reg reg, 170 int disp ); 171 172struct x86_reg x86_deref( struct x86_reg reg ); 173 174struct x86_reg x86_get_base_reg( struct x86_reg reg ); 175 176 177/* Labels, jumps and fixup: 178 */ 179int x86_get_label( struct x86_function *p ); 180 181void x64_rexw(struct x86_function *p); 182 183void x86_jcc( struct x86_function *p, 184 enum x86_cc cc, 185 int label ); 186 187int x86_jcc_forward( struct x86_function *p, 188 enum x86_cc cc ); 189 190int x86_jmp_forward( struct x86_function *p); 191 192int x86_call_forward( struct x86_function *p); 193 194void x86_fixup_fwd_jump( struct x86_function *p, 195 int fixup ); 196 197void x86_jmp( struct x86_function *p, int label ); 198 199/* void x86_call( struct x86_function *p, void (*label)() ); */ 200void x86_call( struct x86_function *p, struct x86_reg reg); 201 202void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); 203void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ); 204void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ); 205void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ); 206void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ); 207void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ); 208void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ); 209 210 211/* Macro for sse_shufps() and sse2_pshufd(): 212 */ 213#define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6)) 214#define SHUF_NOOP RSW(0,1,2,3) 215#define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3) 216 217void mmx_emms( struct x86_function *p ); 218void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 219void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 220void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 221void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 222 223void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 224void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 225void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 226void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 227void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 228void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 229void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 230 231void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 232void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 233void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 234void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 235void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 236 237void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 238void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 239void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 240void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 241void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, 242 unsigned char shuf ); 243void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, 244 unsigned char shuf ); 245void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, 246 unsigned char shuf ); 247void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 248void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 249 250void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 251void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 252void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 253void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 254 255void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 256void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 257void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 258 259void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 260void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 261void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 262 263void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 264void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 265 266void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 267 268void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); 269void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); 270void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); 271 272void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr); 273void sse_prefetch0( struct x86_function *p, struct x86_reg ptr); 274void sse_prefetch1( struct x86_function *p, struct x86_reg ptr); 275 276void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src); 277 278void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 279void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 280void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 281void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 282void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 283void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 284void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, 285 enum sse_cc cc ); 286void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 287void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 288void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 289void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 290void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 291void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 292void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 293void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 294void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 295void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 296void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 297void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 298void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 299void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 300void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 301void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 302void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 303void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, 304 unsigned char shuf ); 305void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 306void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 307void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); 308void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src); 309 310void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 311void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 312void x86_cmovcc( struct x86_function *p, struct x86_reg dst, struct x86_reg src, enum x86_cc cc ); 313void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 314void x86_dec( struct x86_function *p, struct x86_reg reg ); 315void x86_inc( struct x86_function *p, struct x86_reg reg ); 316void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 317void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 318void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 319void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 320void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 321void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 322void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 323void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm ); 324void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm ); 325void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm ); 326void x86_mul( struct x86_function *p, struct x86_reg src ); 327void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 328void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 329void x86_pop( struct x86_function *p, struct x86_reg reg ); 330void x86_push( struct x86_function *p, struct x86_reg reg ); 331void x86_push_imm32( struct x86_function *p, int imm ); 332void x86_ret( struct x86_function *p ); 333void x86_retw( struct x86_function *p, unsigned short imm ); 334void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 335void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 336void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 337void x86_sahf( struct x86_function *p ); 338void x86_div( struct x86_function *p, struct x86_reg src ); 339void x86_bswap( struct x86_function *p, struct x86_reg src ); 340void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); 341void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); 342void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); 343 344void x86_cdecl_caller_push_regs( struct x86_function *p ); 345void x86_cdecl_caller_pop_regs( struct x86_function *p ); 346 347void x87_assert_stack_empty( struct x86_function *p ); 348 349void x87_f2xm1( struct x86_function *p ); 350void x87_fabs( struct x86_function *p ); 351void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 352void x87_faddp( struct x86_function *p, struct x86_reg dst ); 353void x87_fchs( struct x86_function *p ); 354void x87_fclex( struct x86_function *p ); 355void x87_fcmovb( struct x86_function *p, struct x86_reg src ); 356void x87_fcmovbe( struct x86_function *p, struct x86_reg src ); 357void x87_fcmove( struct x86_function *p, struct x86_reg src ); 358void x87_fcmovnb( struct x86_function *p, struct x86_reg src ); 359void x87_fcmovnbe( struct x86_function *p, struct x86_reg src ); 360void x87_fcmovne( struct x86_function *p, struct x86_reg src ); 361void x87_fcom( struct x86_function *p, struct x86_reg dst ); 362void x87_fcomi( struct x86_function *p, struct x86_reg dst ); 363void x87_fcomip( struct x86_function *p, struct x86_reg dst ); 364void x87_fcomp( struct x86_function *p, struct x86_reg dst ); 365void x87_fcos( struct x86_function *p ); 366void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 367void x87_fdivp( struct x86_function *p, struct x86_reg dst ); 368void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 369void x87_fdivrp( struct x86_function *p, struct x86_reg dst ); 370void x87_fild( struct x86_function *p, struct x86_reg arg ); 371void x87_fist( struct x86_function *p, struct x86_reg dst ); 372void x87_fistp( struct x86_function *p, struct x86_reg dst ); 373void x87_fld( struct x86_function *p, struct x86_reg arg ); 374void x87_fld1( struct x86_function *p ); 375void x87_fldcw( struct x86_function *p, struct x86_reg arg ); 376void x87_fldl2e( struct x86_function *p ); 377void x87_fldln2( struct x86_function *p ); 378void x87_fldz( struct x86_function *p ); 379void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 380void x87_fmulp( struct x86_function *p, struct x86_reg dst ); 381void x87_fnclex( struct x86_function *p ); 382void x87_fprndint( struct x86_function *p ); 383void x87_fpop( struct x86_function *p ); 384void x87_fscale( struct x86_function *p ); 385void x87_fsin( struct x86_function *p ); 386void x87_fsincos( struct x86_function *p ); 387void x87_fsqrt( struct x86_function *p ); 388void x87_fst( struct x86_function *p, struct x86_reg dst ); 389void x87_fstp( struct x86_function *p, struct x86_reg dst ); 390void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 391void x87_fsubp( struct x86_function *p, struct x86_reg dst ); 392void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 393void x87_fsubrp( struct x86_function *p, struct x86_reg dst ); 394void x87_ftst( struct x86_function *p ); 395void x87_fxch( struct x86_function *p, struct x86_reg dst ); 396void x87_fxtract( struct x86_function *p ); 397void x87_fyl2x( struct x86_function *p ); 398void x87_fyl2xp1( struct x86_function *p ); 399void x87_fwait( struct x86_function *p ); 400void x87_fnstcw( struct x86_function *p, struct x86_reg dst ); 401void x87_fnstsw( struct x86_function *p, struct x86_reg dst ); 402void x87_fucompp( struct x86_function *p ); 403void x87_fucomp( struct x86_function *p, struct x86_reg arg ); 404void x87_fucom( struct x86_function *p, struct x86_reg arg ); 405 406 407 408/* Retrieve a reference to one of the function arguments, taking into 409 * account any push/pop activity. Note - doesn't track explicit 410 * manipulation of ESP by other instructions. 411 */ 412struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg ); 413 414#endif 415#endif 416