19682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	mmx.h
29682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
39682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	MultiMedia eXtensions GCC interface library for IA32.
49682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
59682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	To use this library, simply include this header file
69682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	and compile with GCC.  You MUST have inlining enabled
79682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	in order for mmx_ok() to work; this can be done by
89682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	simply using -O on the GCC command line.
99682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Compiling with -DMMX_TRACE will cause detailed trace
119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	output to be sent to stderr for each mmx operation.
129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	This adds lots of code, and obviously slows execution to
139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	a crawl, but can be very useful for debugging.
149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	AND FITNESS FOR ANY PARTICULAR PURPOSE.
199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	1997-99 by H. Dietz and R. Fisher
219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Notes:
239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	It appears that the latest gas has the pand problem fixed, therefore
249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  I'll undefine BROKEN_PAND by default.
259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#ifndef _MMX_H
289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define _MMX_H
299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	Warning:  at this writing, the version of GAS packaged
329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	with most Linux distributions does not handle the
339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	parallel AND operation mnemonic correctly.  If the
349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	symbol BROKEN_PAND is defined, a slower alternative
359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	coding will be used.  If execution of mmxtest results
369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	in an illegal instruction fault, define this symbol.
379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#undef	BROKEN_PAND
399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	The type of an value that fits in an MMX register
429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	(note that long long constant values MUST be suffixed
439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 by LL and unsigned long long values by ULL, lest
449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 they be truncated by the compiler)
459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Halltypedef	union {
479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	long long		q;	/* Quadword (64-bit) value */
489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	unsigned long long	uq;	/* Unsigned Quadword */
499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int			d[2];	/* 2 Doubleword (32-bit) values */
509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	unsigned int		ud[2];	/* 2 Unsigned Doubleword */
519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	short			w[4];	/* 4 Word (16-bit) values */
529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	unsigned short		uw[4];	/* 4 Unsigned Word */
539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	char			b[8];	/* 8 Byte (8-bit) values */
549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	unsigned char		ub[8];	/* 8 Unsigned Byte */
559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	float			s[2];	/* Single-precision (32-bit) value */
569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} __attribute__ ((aligned (8))) mmx_t;	/* On an 8-byte (64-bit) boundary */
579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if 0
609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	Function to test if multimedia instructions are supported...
619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallinline extern int
639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallmm_support(void)
649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* Returns 1 if MMX instructions are supported,
669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	   3 if Cyrix MMX and Extended MMX instructions are supported
679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	   5 if AMD MMX and 3DNow! instructions are supported
689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	   0 if hardware does not support any of these
699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	*/
709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	register int rval = 0;
719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	__asm__ __volatile__ (
739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* See if CPUID instruction is supported ... */
749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* ... Get copies of EFLAGS into eax and ecx */
759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"pushf\n\t"
769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"popl %%eax\n\t"
779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"movl %%eax, %%ecx\n\t"
789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* ... Toggle the ID bit in one copy and store */
809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/*     to the EFLAGS reg */
819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"xorl $0x200000, %%eax\n\t"
829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"push %%eax\n\t"
839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"popf\n\t"
849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* ... Get the (hopefully modified) EFLAGS */
869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"pushf\n\t"
879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"popl %%eax\n\t"
889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* ... Compare and test result */
909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"xorl %%eax, %%ecx\n\t"
919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"testl $0x200000, %%ecx\n\t"
929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jz NotSupported1\n\t"		/* CPUID not supported */
939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* Get standard CPUID information, and
969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		       go to a specific vendor section */
979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"movl $0, %%eax\n\t"
989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"cpuid\n\t"
999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* Check for Intel */
1019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"cmpl $0x756e6547, %%ebx\n\t"
1029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jne TryAMD\n\t"
1039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"cmpl $0x49656e69, %%edx\n\t"
1049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jne TryAMD\n\t"
1059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"cmpl $0x6c65746e, %%ecx\n"
1069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jne TryAMD\n\t"
1079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jmp Intel\n\t"
1089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* Check for AMD */
1109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"\nTryAMD:\n\t"
1119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"cmpl $0x68747541, %%ebx\n\t"
1129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jne TryCyrix\n\t"
1139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"cmpl $0x69746e65, %%edx\n\t"
1149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jne TryCyrix\n\t"
1159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"cmpl $0x444d4163, %%ecx\n"
1169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jne TryCyrix\n\t"
1179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jmp AMD\n\t"
1189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* Check for Cyrix */
1209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"\nTryCyrix:\n\t"
1219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"cmpl $0x69727943, %%ebx\n\t"
1229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jne NotSupported2\n\t"
1239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"cmpl $0x736e4978, %%edx\n\t"
1249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jne NotSupported3\n\t"
1259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"cmpl $0x64616574, %%ecx\n\t"
1269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jne NotSupported4\n\t"
1279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* Drop through to Cyrix... */
1289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* Cyrix Section */
1319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* See if extended CPUID level 80000001 is supported */
1329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* The value of CPUID/80000001 for the 6x86MX is undefined
1339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		   according to the Cyrix CPU Detection Guide (Preliminary
1349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		   Rev. 1.01 table 1), so we'll check the value of eax for
1359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		   CPUID/0 to see if standard CPUID level 2 is supported.
1369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		   According to the table, the only CPU which supports level
1379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		   2 is also the only one which supports extended CPUID levels.
1389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		*/
1399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"cmpl $0x2, %%eax\n\t"
1409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jne MMXtest\n\t"	/* Use standard CPUID instead */
1419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* Extended CPUID supported (in theory), so get extended
1439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		   features */
1449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"movl $0x80000001, %%eax\n\t"
1459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"cpuid\n\t"
1469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"testl $0x00800000, %%eax\n\t"	/* Test for MMX */
1479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jz NotSupported5\n\t"		/* MMX not supported */
1489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"testl $0x01000000, %%eax\n\t"	/* Test for Ext'd MMX */
1499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jnz EMMXSupported\n\t"
1509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"movl $1, %0:\n\n\t"		/* MMX Supported */
1519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jmp Return\n\n"
1529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"EMMXSupported:\n\t"
1539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"movl $3, %0:\n\n\t"		/* EMMX and MMX Supported */
1549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jmp Return\n\t"
1559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* AMD Section */
1589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"AMD:\n\t"
1599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* See if extended CPUID is supported */
1619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"movl $0x80000000, %%eax\n\t"
1629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"cpuid\n\t"
1639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"cmpl $0x80000000, %%eax\n\t"
1649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jl MMXtest\n\t"	/* Use standard CPUID instead */
1659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* Extended CPUID supported, so get extended features */
1679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"movl $0x80000001, %%eax\n\t"
1689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"cpuid\n\t"
1699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"testl $0x00800000, %%edx\n\t"	/* Test for MMX */
1709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jz NotSupported6\n\t"		/* MMX not supported */
1719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"testl $0x80000000, %%edx\n\t"	/* Test for 3DNow! */
1729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jnz ThreeDNowSupported\n\t"
1739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"movl $1, %0:\n\n\t"		/* MMX Supported */
1749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jmp Return\n\n"
1759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"ThreeDNowSupported:\n\t"
1769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"movl $5, %0:\n\n\t"		/* 3DNow! and MMX Supported */
1779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jmp Return\n\t"
1789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* Intel Section */
1819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"Intel:\n\t"
1829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* Check for MMX */
1849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"MMXtest:\n\t"
1859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"movl $1, %%eax\n\t"
1869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"cpuid\n\t"
1879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"testl $0x00800000, %%edx\n\t"	/* Test for MMX */
1889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jz NotSupported7\n\t"		/* MMX Not supported */
1899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"movl $1, %0:\n\n\t"		/* MMX Supported */
1909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"jmp Return\n\t"
1919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* Nothing supported */
1939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"\nNotSupported1:\n\t"
1949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"#movl $101, %0:\n\n\t"
1959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"\nNotSupported2:\n\t"
1969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"#movl $102, %0:\n\n\t"
1979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"\nNotSupported3:\n\t"
1989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"#movl $103, %0:\n\n\t"
1999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"\nNotSupported4:\n\t"
2009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"#movl $104, %0:\n\n\t"
2019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"\nNotSupported5:\n\t"
2029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"#movl $105, %0:\n\n\t"
2039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"\nNotSupported6:\n\t"
2049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"#movl $106, %0:\n\n\t"
2059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"\nNotSupported7:\n\t"
2069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"#movl $107, %0:\n\n\t"
2079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"movl $0, %0:\n\n\t"
2089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		"Return:\n\t"
2109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		: "=a" (rval)
2119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		: /* no input */
2129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		: "eax", "ebx", "ecx", "edx"
2139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	);
2149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* Return */
2169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	return(rval);
2179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
2189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	Function to test if mmx instructions are supported...
2209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
2219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallinline extern int
2229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallmmx_ok(void)
2239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
2249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* Returns 1 if MMX instructions are supported, 0 otherwise */
2259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	return ( mm_support() & 0x1 );
2269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
2279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif
2289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	Helper functions for the instruction macros that follow...
2309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	(note that memory-to-register, m2r, instructions are nearly
2319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 as efficient as register-to-register, r2r, instructions;
2329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 however, memory-to-memory instructions are really simulated
2339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 as a convenience, and are only 1/3 as efficient)
2349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
2359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#ifdef	MMX_TRACE
2369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	Include the stuff for printing a trace to stderr...
2389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
2399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	mmx_i2r(op, imm, reg) \
2419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	{ \
2429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_t mmx_trace; \
2439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_trace.uq = (imm); \
2449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		printf(#op "_i2r(" #imm "=0x%08x%08x, ", \
2459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			mmx_trace.d[1], mmx_trace.d[0]); \
2469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		__asm__ __volatile__ ("movq %%" #reg ", %0" \
2479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : "=y" (mmx_trace) \
2489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : /* nothing */ ); \
2499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		printf(#reg "=0x%08x%08x) => ", \
2509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			mmx_trace.d[1], mmx_trace.d[0]); \
2519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		__asm__ __volatile__ (#op " %0, %%" #reg \
2529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : /* nothing */ \
2539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : "y" (imm)); \
2549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		__asm__ __volatile__ ("movq %%" #reg ", %0" \
2559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : "=y" (mmx_trace) \
2569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : /* nothing */ ); \
2579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		printf(#reg "=0x%08x%08x\n", \
2589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			mmx_trace.d[1], mmx_trace.d[0]); \
2599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
2609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	mmx_m2r(op, mem, reg) \
2629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	{ \
2639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_t mmx_trace; \
2649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_trace = (mem); \
2659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		printf(#op "_m2r(" #mem "=0x%08x%08x, ", \
2669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			mmx_trace.d[1], mmx_trace.d[0]); \
2679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		__asm__ __volatile__ ("movq %%" #reg ", %0" \
2689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : "=y" (mmx_trace) \
2699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : /* nothing */ ); \
2709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		printf(#reg "=0x%08x%08x) => ", \
2719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			mmx_trace.d[1], mmx_trace.d[0]); \
2729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		__asm__ __volatile__ (#op " %0, %%" #reg \
2739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : /* nothing */ \
2749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : "y" (mem)); \
2759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		__asm__ __volatile__ ("movq %%" #reg ", %0" \
2769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : "=y" (mmx_trace) \
2779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : /* nothing */ ); \
2789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		printf(#reg "=0x%08x%08x\n", \
2799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			mmx_trace.d[1], mmx_trace.d[0]); \
2809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
2819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	mmx_r2m(op, reg, mem) \
2839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	{ \
2849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_t mmx_trace; \
2859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		__asm__ __volatile__ ("movq %%" #reg ", %0" \
2869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : "=y" (mmx_trace) \
2879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : /* nothing */ ); \
2889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		printf(#op "_r2m(" #reg "=0x%08x%08x, ", \
2899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			mmx_trace.d[1], mmx_trace.d[0]); \
2909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_trace = (mem); \
2919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		printf(#mem "=0x%08x%08x) => ", \
2929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			mmx_trace.d[1], mmx_trace.d[0]); \
2939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		__asm__ __volatile__ (#op " %%" #reg ", %0" \
2949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : "=y" (mem) \
2959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : /* nothing */ ); \
2969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_trace = (mem); \
2979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		printf(#mem "=0x%08x%08x\n", \
2989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			mmx_trace.d[1], mmx_trace.d[0]); \
2999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
3009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	mmx_r2r(op, regs, regd) \
3029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	{ \
3039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_t mmx_trace; \
3049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		__asm__ __volatile__ ("movq %%" #regs ", %0" \
3059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : "=y" (mmx_trace) \
3069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : /* nothing */ ); \
3079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		printf(#op "_r2r(" #regs "=0x%08x%08x, ", \
3089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			mmx_trace.d[1], mmx_trace.d[0]); \
3099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		__asm__ __volatile__ ("movq %%" #regd ", %0" \
3109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : "=y" (mmx_trace) \
3119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : /* nothing */ ); \
3129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		printf(#regd "=0x%08x%08x) => ", \
3139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			mmx_trace.d[1], mmx_trace.d[0]); \
3149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		__asm__ __volatile__ (#op " %" #regs ", %" #regd); \
3159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		__asm__ __volatile__ ("movq %%" #regd ", %0" \
3169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : "=y" (mmx_trace) \
3179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : /* nothing */ ); \
3189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		printf(#regd "=0x%08x%08x\n", \
3199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			mmx_trace.d[1], mmx_trace.d[0]); \
3209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
3219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	mmx_m2m(op, mems, memd) \
3239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	{ \
3249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_t mmx_trace; \
3259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_trace = (mems); \
3269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		printf(#op "_m2m(" #mems "=0x%08x%08x, ", \
3279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			mmx_trace.d[1], mmx_trace.d[0]); \
3289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_trace = (memd); \
3299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		printf(#memd "=0x%08x%08x) => ", \
3309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			mmx_trace.d[1], mmx_trace.d[0]); \
3319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
3329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      #op " %1, %%mm0\n\t" \
3339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      "movq %%mm0, %0" \
3349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : "=y" (memd) \
3359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				      : "y" (mems)); \
3369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_trace = (memd); \
3379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		printf(#memd "=0x%08x%08x\n", \
3389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			mmx_trace.d[1], mmx_trace.d[0]); \
3399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
3409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#else
3429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	These macros are a lot simpler without the tracing...
3449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
3459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	mmx_i2r(op, imm, reg) \
3479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	__asm__ __volatile__ (#op " %0, %%" #reg \
3489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			      : /* nothing */ \
3499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			      : "y" (imm) )
3509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	mmx_m2r(op, mem, reg) \
3529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	__asm__ __volatile__ (#op " %0, %%" #reg \
3539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			      : /* nothing */ \
3549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			      : "m" (mem))
3559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	mmx_r2m(op, reg, mem) \
3579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	__asm__ __volatile__ (#op " %%" #reg ", %0" \
3589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			      : "=m" (mem) \
3599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			      : /* nothing */ )
3609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	mmx_r2r(op, regs, regd) \
3629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	__asm__ __volatile__ (#op " %" #regs ", %" #regd)
3639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	mmx_m2m(op, mems, memd) \
3659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
3669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			      #op " %1, %%mm0\n\t" \
3679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			      "movq %%mm0, %0" \
3689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			      : "=y" (memd) \
3699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			      : "y" (mems))
3709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif
3729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	1x64 MOVe Quadword
3759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	(this is both a load and a store...
3769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 in fact, it is the only way to store)
3779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
3789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	movq_m2r(var, reg)	mmx_m2r(movq, var, reg)
3799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	movq_r2m(reg, var)	mmx_r2m(movq, reg, var)
3809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	movq_r2r(regs, regd)	mmx_r2r(movq, regs, regd)
3819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	movq(vars, vard) \
3829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	__asm__ __volatile__ ("movq %1, %%mm0\n\t" \
3839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			      "movq %%mm0, %0" \
3849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			      : "=y" (vard) \
3859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			      : "y" (vars))
3869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	1x32 MOVe Doubleword
3899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	(like movq, this is both load and store...
3909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 but is most useful for moving things between
3919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 mmx registers and ordinary registers)
3929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
3939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	movd_m2r(var, reg)	mmx_m2r(movd, var, reg)
3949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	movd_r2m(reg, var)	mmx_r2m(movd, reg, var)
3959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	movd_r2r(regs, regd)	mmx_r2r(movd, regs, regd)
3969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	movd(vars, vard) \
3979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	__asm__ __volatile__ ("movd %1, %%mm0\n\t" \
3989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			      "movd %%mm0, %0" \
3999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			      : "=y" (vard) \
4009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			      : "y" (vars))
4019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	2x32, 4x16, and 8x8 Parallel ADDs
4049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
4059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddd_m2r(var, reg)	mmx_m2r(paddd, var, reg)
4069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddd_r2r(regs, regd)	mmx_r2r(paddd, regs, regd)
4079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddd(vars, vard)	mmx_m2m(paddd, vars, vard)
4089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddw_m2r(var, reg)	mmx_m2r(paddw, var, reg)
4109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddw_r2r(regs, regd)	mmx_r2r(paddw, regs, regd)
4119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddw(vars, vard)	mmx_m2m(paddw, vars, vard)
4129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddb_m2r(var, reg)	mmx_m2r(paddb, var, reg)
4149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddb_r2r(regs, regd)	mmx_r2r(paddb, regs, regd)
4159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddb(vars, vard)	mmx_m2m(paddb, vars, vard)
4169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	4x16 and 8x8 Parallel ADDs using Saturation arithmetic
4199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
4209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddsw_m2r(var, reg)	mmx_m2r(paddsw, var, reg)
4219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddsw_r2r(regs, regd)	mmx_r2r(paddsw, regs, regd)
4229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddsw(vars, vard)	mmx_m2m(paddsw, vars, vard)
4239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddsb_m2r(var, reg)	mmx_m2r(paddsb, var, reg)
4259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddsb_r2r(regs, regd)	mmx_r2r(paddsb, regs, regd)
4269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddsb(vars, vard)	mmx_m2m(paddsb, vars, vard)
4279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic
4309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
4319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddusw_m2r(var, reg)	mmx_m2r(paddusw, var, reg)
4329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddusw_r2r(regs, regd)	mmx_r2r(paddusw, regs, regd)
4339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddusw(vars, vard)	mmx_m2m(paddusw, vars, vard)
4349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddusb_m2r(var, reg)	mmx_m2r(paddusb, var, reg)
4369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddusb_r2r(regs, regd)	mmx_r2r(paddusb, regs, regd)
4379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	paddusb(vars, vard)	mmx_m2m(paddusb, vars, vard)
4389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	2x32, 4x16, and 8x8 Parallel SUBs
4419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
4429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubd_m2r(var, reg)	mmx_m2r(psubd, var, reg)
4439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubd_r2r(regs, regd)	mmx_r2r(psubd, regs, regd)
4449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubd(vars, vard)	mmx_m2m(psubd, vars, vard)
4459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubw_m2r(var, reg)	mmx_m2r(psubw, var, reg)
4479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubw_r2r(regs, regd)	mmx_r2r(psubw, regs, regd)
4489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubw(vars, vard)	mmx_m2m(psubw, vars, vard)
4499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubb_m2r(var, reg)	mmx_m2r(psubb, var, reg)
4519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubb_r2r(regs, regd)	mmx_r2r(psubb, regs, regd)
4529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubb(vars, vard)	mmx_m2m(psubb, vars, vard)
4539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	4x16 and 8x8 Parallel SUBs using Saturation arithmetic
4569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
4579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubsw_m2r(var, reg)	mmx_m2r(psubsw, var, reg)
4589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubsw_r2r(regs, regd)	mmx_r2r(psubsw, regs, regd)
4599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubsw(vars, vard)	mmx_m2m(psubsw, vars, vard)
4609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubsb_m2r(var, reg)	mmx_m2r(psubsb, var, reg)
4629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubsb_r2r(regs, regd)	mmx_r2r(psubsb, regs, regd)
4639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubsb(vars, vard)	mmx_m2m(psubsb, vars, vard)
4649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic
4679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
4689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubusw_m2r(var, reg)	mmx_m2r(psubusw, var, reg)
4699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubusw_r2r(regs, regd)	mmx_r2r(psubusw, regs, regd)
4709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubusw(vars, vard)	mmx_m2m(psubusw, vars, vard)
4719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubusb_m2r(var, reg)	mmx_m2r(psubusb, var, reg)
4739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubusb_r2r(regs, regd)	mmx_r2r(psubusb, regs, regd)
4749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psubusb(vars, vard)	mmx_m2m(psubusb, vars, vard)
4759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	4x16 Parallel MULs giving Low 4x16 portions of results
4789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
4799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pmullw_m2r(var, reg)	mmx_m2r(pmullw, var, reg)
4809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pmullw_r2r(regs, regd)	mmx_r2r(pmullw, regs, regd)
4819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pmullw(vars, vard)	mmx_m2m(pmullw, vars, vard)
4829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	4x16 Parallel MULs giving High 4x16 portions of results
4859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
4869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pmulhw_m2r(var, reg)	mmx_m2r(pmulhw, var, reg)
4879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pmulhw_r2r(regs, regd)	mmx_r2r(pmulhw, regs, regd)
4889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pmulhw(vars, vard)	mmx_m2m(pmulhw, vars, vard)
4899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	4x16->2x32 Parallel Mul-ADD
4929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	(muls like pmullw, then adds adjacent 16-bit fields
4939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 in the multiply result to make the final 2x32 result)
4949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
4959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pmaddwd_m2r(var, reg)	mmx_m2r(pmaddwd, var, reg)
4969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pmaddwd_r2r(regs, regd)	mmx_r2r(pmaddwd, regs, regd)
4979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pmaddwd(vars, vard)	mmx_m2m(pmaddwd, vars, vard)
4989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	1x64 bitwise AND
5019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
5029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#ifdef	BROKEN_PAND
5039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pand_m2r(var, reg) \
5049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	{ \
5059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_m2r(pandn, (mmx_t) -1LL, reg); \
5069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_m2r(pandn, var, reg); \
5079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
5089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pand_r2r(regs, regd) \
5099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	{ \
5109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_m2r(pandn, (mmx_t) -1LL, regd); \
5119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_r2r(pandn, regs, regd) \
5129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
5139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pand(vars, vard) \
5149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	{ \
5159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		movq_m2r(vard, mm0); \
5169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_m2r(pandn, (mmx_t) -1LL, mm0); \
5179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		mmx_m2r(pandn, vars, mm0); \
5189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		movq_r2m(mm0, vard); \
5199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
5209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#else
5219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pand_m2r(var, reg)	mmx_m2r(pand, var, reg)
5229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pand_r2r(regs, regd)	mmx_r2r(pand, regs, regd)
5239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pand(vars, vard)	mmx_m2m(pand, vars, vard)
5249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif
5259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	1x64 bitwise AND with Not the destination
5289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
5299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pandn_m2r(var, reg)	mmx_m2r(pandn, var, reg)
5309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pandn_r2r(regs, regd)	mmx_r2r(pandn, regs, regd)
5319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pandn(vars, vard)	mmx_m2m(pandn, vars, vard)
5329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	1x64 bitwise OR
5359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
5369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	por_m2r(var, reg)	mmx_m2r(por, var, reg)
5379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	por_r2r(regs, regd)	mmx_r2r(por, regs, regd)
5389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	por(vars, vard)	mmx_m2m(por, vars, vard)
5399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	1x64 bitwise eXclusive OR
5429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
5439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pxor_m2r(var, reg)	mmx_m2r(pxor, var, reg)
5449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pxor_r2r(regs, regd)	mmx_r2r(pxor, regs, regd)
5459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pxor(vars, vard)	mmx_m2m(pxor, vars, vard)
5469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	2x32, 4x16, and 8x8 Parallel CoMPare for EQuality
5499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	(resulting fields are either 0 or -1)
5509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
5519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpeqd_m2r(var, reg)	mmx_m2r(pcmpeqd, var, reg)
5529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpeqd_r2r(regs, regd)	mmx_r2r(pcmpeqd, regs, regd)
5539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpeqd(vars, vard)	mmx_m2m(pcmpeqd, vars, vard)
5549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpeqw_m2r(var, reg)	mmx_m2r(pcmpeqw, var, reg)
5569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpeqw_r2r(regs, regd)	mmx_r2r(pcmpeqw, regs, regd)
5579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpeqw(vars, vard)	mmx_m2m(pcmpeqw, vars, vard)
5589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpeqb_m2r(var, reg)	mmx_m2r(pcmpeqb, var, reg)
5609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpeqb_r2r(regs, regd)	mmx_r2r(pcmpeqb, regs, regd)
5619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpeqb(vars, vard)	mmx_m2m(pcmpeqb, vars, vard)
5629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than
5659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	(resulting fields are either 0 or -1)
5669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
5679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpgtd_m2r(var, reg)	mmx_m2r(pcmpgtd, var, reg)
5689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpgtd_r2r(regs, regd)	mmx_r2r(pcmpgtd, regs, regd)
5699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpgtd(vars, vard)	mmx_m2m(pcmpgtd, vars, vard)
5709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpgtw_m2r(var, reg)	mmx_m2r(pcmpgtw, var, reg)
5729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpgtw_r2r(regs, regd)	mmx_r2r(pcmpgtw, regs, regd)
5739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpgtw(vars, vard)	mmx_m2m(pcmpgtw, vars, vard)
5749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpgtb_m2r(var, reg)	mmx_m2r(pcmpgtb, var, reg)
5769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpgtb_r2r(regs, regd)	mmx_r2r(pcmpgtb, regs, regd)
5779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pcmpgtb(vars, vard)	mmx_m2m(pcmpgtb, vars, vard)
5789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	1x64, 2x32, and 4x16 Parallel Shift Left Logical
5819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
5829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psllq_i2r(imm, reg)	mmx_i2r(psllq, imm, reg)
5839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psllq_m2r(var, reg)	mmx_m2r(psllq, var, reg)
5849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psllq_r2r(regs, regd)	mmx_r2r(psllq, regs, regd)
5859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psllq(vars, vard)	mmx_m2m(psllq, vars, vard)
5869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pslld_i2r(imm, reg)	mmx_i2r(pslld, imm, reg)
5889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pslld_m2r(var, reg)	mmx_m2r(pslld, var, reg)
5899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pslld_r2r(regs, regd)	mmx_r2r(pslld, regs, regd)
5909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	pslld(vars, vard)	mmx_m2m(pslld, vars, vard)
5919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psllw_i2r(imm, reg)	mmx_i2r(psllw, imm, reg)
5939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psllw_m2r(var, reg)	mmx_m2r(psllw, var, reg)
5949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psllw_r2r(regs, regd)	mmx_r2r(psllw, regs, regd)
5959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psllw(vars, vard)	mmx_m2m(psllw, vars, vard)
5969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	1x64, 2x32, and 4x16 Parallel Shift Right Logical
5999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
6009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psrlq_i2r(imm, reg)	mmx_i2r(psrlq, imm, reg)
6019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psrlq_m2r(var, reg)	mmx_m2r(psrlq, var, reg)
6029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psrlq_r2r(regs, regd)	mmx_r2r(psrlq, regs, regd)
6039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psrlq(vars, vard)	mmx_m2m(psrlq, vars, vard)
6049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psrld_i2r(imm, reg)	mmx_i2r(psrld, imm, reg)
6069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psrld_m2r(var, reg)	mmx_m2r(psrld, var, reg)
6079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psrld_r2r(regs, regd)	mmx_r2r(psrld, regs, regd)
6089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psrld(vars, vard)	mmx_m2m(psrld, vars, vard)
6099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psrlw_i2r(imm, reg)	mmx_i2r(psrlw, imm, reg)
6119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psrlw_m2r(var, reg)	mmx_m2r(psrlw, var, reg)
6129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psrlw_r2r(regs, regd)	mmx_r2r(psrlw, regs, regd)
6139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psrlw(vars, vard)	mmx_m2m(psrlw, vars, vard)
6149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	2x32 and 4x16 Parallel Shift Right Arithmetic
6179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
6189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psrad_i2r(imm, reg)	mmx_i2r(psrad, imm, reg)
6199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psrad_m2r(var, reg)	mmx_m2r(psrad, var, reg)
6209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psrad_r2r(regs, regd)	mmx_r2r(psrad, regs, regd)
6219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psrad(vars, vard)	mmx_m2m(psrad, vars, vard)
6229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psraw_i2r(imm, reg)	mmx_i2r(psraw, imm, reg)
6249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psraw_m2r(var, reg)	mmx_m2r(psraw, var, reg)
6259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psraw_r2r(regs, regd)	mmx_r2r(psraw, regs, regd)
6269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	psraw(vars, vard)	mmx_m2m(psraw, vars, vard)
6279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	2x32->4x16 and 4x16->8x8 PACK and Signed Saturate
6309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	(packs source and dest fields into dest in that order)
6319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
6329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	packssdw_m2r(var, reg)	mmx_m2r(packssdw, var, reg)
6339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd)
6349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	packssdw(vars, vard)	mmx_m2m(packssdw, vars, vard)
6359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	packsswb_m2r(var, reg)	mmx_m2r(packsswb, var, reg)
6379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd)
6389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	packsswb(vars, vard)	mmx_m2m(packsswb, vars, vard)
6399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	4x16->8x8 PACK and Unsigned Saturate
6429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	(packs source and dest fields into dest in that order)
6439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
6449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	packuswb_m2r(var, reg)	mmx_m2r(packuswb, var, reg)
6459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd)
6469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	packuswb(vars, vard)	mmx_m2m(packuswb, vars, vard)
6479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low
6509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	(interleaves low half of dest with low half of source
6519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 as padding in each result field)
6529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
6539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpckldq_m2r(var, reg)	mmx_m2r(punpckldq, var, reg)
6549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd)
6559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpckldq(vars, vard)	mmx_m2m(punpckldq, vars, vard)
6569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpcklwd_m2r(var, reg)	mmx_m2r(punpcklwd, var, reg)
6589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd)
6599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpcklwd(vars, vard)	mmx_m2m(punpcklwd, vars, vard)
6609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpcklbw_m2r(var, reg)	mmx_m2r(punpcklbw, var, reg)
6629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd)
6639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpcklbw(vars, vard)	mmx_m2m(punpcklbw, vars, vard)
6649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High
6679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	(interleaves high half of dest with high half of source
6689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 as padding in each result field)
6699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
6709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpckhdq_m2r(var, reg)	mmx_m2r(punpckhdq, var, reg)
6719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd)
6729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpckhdq(vars, vard)	mmx_m2m(punpckhdq, vars, vard)
6739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpckhwd_m2r(var, reg)	mmx_m2r(punpckhwd, var, reg)
6759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd)
6769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpckhwd(vars, vard)	mmx_m2m(punpckhwd, vars, vard)
6779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpckhbw_m2r(var, reg)	mmx_m2r(punpckhbw, var, reg)
6799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd)
6809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	punpckhbw(vars, vard)	mmx_m2m(punpckhbw, vars, vard)
6819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*	Empty MMx State
6849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	(used to clean-up when going from mmx to float use
6859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 of the registers that are shared by both; note that
6869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 there is no float-to-mmx operation needed, because
6879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 only the float tag word info is corruptible)
6889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
6899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#ifdef	MMX_TRACE
6909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	emms() \
6929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	{ \
6939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		printf("emms()\n"); \
6949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		__asm__ __volatile__ ("emms"); \
6959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
6969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#else
6989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define	emms()			__asm__ __volatile__ ("emms")
7009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
7019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif
7029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
7039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif
7049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
705