19682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*
29682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    SDL - Simple DirectMedia Layer
39682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Copyright (C) 1997-2012 Sam Lantinga
49682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
59682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    This library is free software; you can redistribute it and/or
69682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    modify it under the terms of the GNU Lesser General Public
79682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    License as published by the Free Software Foundation; either
89682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    version 2.1 of the License, or (at your option) any later version.
99682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    This library is distributed in the hope that it will be useful,
119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    but WITHOUT ANY WARRANTY; without even the implied warranty of
129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Lesser General Public License for more details.
149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    You should have received a copy of the GNU Lesser General Public
169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    License along with this library; if not, write to the Free Software
179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Sam Lantinga
209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    slouken@libsdl.org
219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "SDL_config.h"
239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*
259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    MMX assembler version of SDL_MixAudio for signed little endian 16 bit samples and signed 8 bit samples
269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Copyright 2002 Stephane Marchesin (stephane.marchesin@wanadoo.fr)
279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    This code is licensed under the LGPL (see COPYING for details)
289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Assumes buffer size in bytes is a multiple of 16
309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Assumes SDL_MIX_MAXVOLUME = 128
319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/***********************************************
359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*   Mixing for 16 bit signed buffers
369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall***********************************************/
379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if defined(SDL_BUGGY_MMX_MIXERS) /* buggy, so we're disabling them. --ryan. */
399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if defined(__GNUC__) && defined(__i386__) && defined(SDL_ASSEMBLY_ROUTINES)
409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallvoid SDL_MixAudio_MMX_S16(char* dst,char* src,unsigned int size,int volume)
419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    __asm__ __volatile__ (
439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movl %3,%%eax\n"	/* eax = volume */
459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movl %2,%%edx\n"	/* edx = size */
479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	shrl $4,%%edx\n"	/* process 16 bytes per iteration = 8 samples */
499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	jz .endS16\n"
519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	pxor %%mm0,%%mm0\n"
539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movd %%eax,%%mm0\n"
559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movq %%mm0,%%mm1\n"
569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	psllq $16,%%mm0\n"
579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	por %%mm1,%%mm0\n"
589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	psllq $16,%%mm0\n"
599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	por %%mm1,%%mm0\n"
609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	psllq $16,%%mm0\n"
619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	por %%mm1,%%mm0\n"		/* mm0 = vol|vol|vol|vol */
629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall".align 8\n"
649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	.mixloopS16:\n"
659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movq (%1),%%mm1\n" /* mm1 = a|b|c|d */
679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movq %%mm1,%%mm2\n" /* mm2 = a|b|c|d */
699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movq 8(%1),%%mm4\n" /* mm4 = e|f|g|h */
719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* pr� charger le buffer dst dans mm7 */
739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movq (%0),%%mm7\n" /* mm7 = dst[0] */
749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* multiplier par le volume */
769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	pmullw %%mm0,%%mm1\n" /* mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */
779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	pmulhw %%mm0,%%mm2\n" /* mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v) */
799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movq %%mm4,%%mm5\n" /* mm5 = e|f|g|h */
809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	pmullw %%mm0,%%mm4\n" /* mm4 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */
829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	pmulhw %%mm0,%%mm5\n" /* mm5 = h(e*v)|h(f*v)|h(g*v)|h(h*v) */
849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movq %%mm1,%%mm3\n" /* mm3 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */
859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	punpckhwd %%mm2,%%mm1\n" /* mm1 = a*v|b*v */
879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movq %%mm4,%%mm6\n" /* mm6 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */
899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	punpcklwd %%mm2,%%mm3\n" /* mm3 = c*v|d*v */
909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	punpckhwd %%mm5,%%mm4\n" /* mm4 = e*f|f*v */
929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	punpcklwd %%mm5,%%mm6\n" /* mm6 = g*v|h*v */
949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* pr� charger le buffer dst dans mm5 */
969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movq 8(%0),%%mm5\n" /* mm5 = dst[1] */
979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* diviser par 128 */
999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	psrad $7,%%mm1\n" /* mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME */
1009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	add $16,%1\n"
1019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	psrad $7,%%mm3\n" /* mm3 = c*v/128|d*v/128 */
1039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	psrad $7,%%mm4\n" /* mm4 = e*v/128|f*v/128 */
1059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* mm1 = le sample avec le volume modifi� */
1079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	packssdw %%mm1,%%mm3\n" /* mm3 = s(a*v|b*v|c*v|d*v) */
1089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	psrad $7,%%mm6\n" /* mm6= g*v/128|h*v/128 */
1109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	paddsw %%mm7,%%mm3\n" /* mm3 = adjust_volume(src)+dst */
1119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* mm4 = le sample avec le volume modifi� */
1139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	packssdw %%mm4,%%mm6\n" /* mm6 = s(e*v|f*v|g*v|h*v) */
1149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movq %%mm3,(%0)\n"
1159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	paddsw %%mm5,%%mm6\n" /* mm6 = adjust_volume(src)+dst */
1179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movq %%mm6,8(%0)\n"
1199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	add $16,%0\n"
1219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	dec %%edx\n"
1239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	jnz .mixloopS16\n"
1259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	emms\n"
1279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall".endS16:\n"
1299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 :
1309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 : "r" (dst), "r"(src),"m"(size),
1319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 "m"(volume)
1329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 : "eax","edx","memory"
1339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 );
1349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
1359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*////////////////////////////////////////////// */
1399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* Mixing for 8 bit signed buffers */
1409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*////////////////////////////////////////////// */
1419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallvoid SDL_MixAudio_MMX_S8(char* dst,char* src,unsigned int size,int volume)
1439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
1449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    __asm__ __volatile__ (
1459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movl %3,%%eax\n"	/* eax = volume */
1479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movd %%eax,%%mm0\n"
1499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movq %%mm0,%%mm1\n"
1509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	psllq $16,%%mm0\n"
1519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	por %%mm1,%%mm0\n"
1529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	psllq $16,%%mm0\n"
1539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	por %%mm1,%%mm0\n"
1549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	psllq $16,%%mm0\n"
1559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	por %%mm1,%%mm0\n"
1569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movl %2,%%edx\n"	/* edx = size */
1589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	shr $3,%%edx\n"	/* process 8 bytes per iteration = 8 samples */
1599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	cmp $0,%%edx\n"
1619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	je .endS8\n"
1629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall".align 8\n"
1649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	.mixloopS8:\n"
1659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	pxor %%mm2,%%mm2\n"		/* mm2 = 0 */
1679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movq (%1),%%mm1\n"	/* mm1 = a|b|c|d|e|f|g|h */
1689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movq %%mm1,%%mm3\n" 	/* mm3 = a|b|c|d|e|f|g|h */
1709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0 */
1729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	pcmpgtb %%mm1,%%mm2\n"	/* mm2 = 11111111|00000000|00000000.... */
1739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	punpckhbw %%mm2,%%mm1\n"	/* mm1 = 0|a|0|b|0|c|0|d */
1759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	punpcklbw %%mm2,%%mm3\n"	/* mm3 = 0|e|0|f|0|g|0|h */
1779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movq (%0),%%mm2\n"	/* mm2 = destination */
1789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	pmullw %%mm0,%%mm1\n"	/* mm1 = v*a|v*b|v*c|v*d */
1809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	add $8,%1\n"
1819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	pmullw %%mm0,%%mm3\n"	/* mm3 = v*e|v*f|v*g|v*h */
1839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	psraw $7,%%mm1\n"		/* mm1 = v*a/128|v*b/128|v*c/128|v*d/128  */
1849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	psraw $7,%%mm3\n"		/* mm3 = v*e/128|v*f/128|v*g/128|v*h/128 */
1869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	packsswb %%mm1,%%mm3\n"	/* mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128 */
1889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	paddsb %%mm2,%%mm3\n"	/* add to destination buffer */
1909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	movq %%mm3,(%0)\n"	/* store back to ram */
1929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	add $8,%0\n"
1939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	dec %%edx\n"
1959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	jnz .mixloopS8\n"
1979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall".endS8:\n"
1999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall"	emms\n"
2009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 :
2019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 : "r" (dst), "r"(src),"m"(size),
2029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 "m"(volume)
2039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 : "eax","edx","memory"
2049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 );
2059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
2069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif
2079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif
208