19682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 29682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL - Simple DirectMedia Layer 39682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Copyright (C) 1997-2012 Sam Lantinga 49682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 59682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall This library is free software; you can redistribute it and/or 69682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall modify it under the terms of the GNU Lesser General Public 79682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall License as published by the Free Software Foundation; either 89682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall version 2.1 of the License, or (at your option) any later version. 99682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall This library is distributed in the hope that it will be useful, 119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall but WITHOUT ANY WARRANTY; without even the implied warranty of 129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Lesser General Public License for more details. 149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall You should have received a copy of the GNU Lesser General Public 169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall License along with this library; if not, write to the Free Software 179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Sam Lantinga 209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall slouken@libsdl.org 219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "SDL_config.h" 239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall MMX assembler version of SDL_MixAudio for signed little endian 16 bit samples and signed 8 bit samples 269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Copyright 2002 Stephane Marchesin (stephane.marchesin@wanadoo.fr) 279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall This code is licensed under the LGPL (see COPYING for details) 289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Assumes buffer size in bytes is a multiple of 16 309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Assumes SDL_MIX_MAXVOLUME = 128 319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*********************************************** 359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall* Mixing for 16 bit signed buffers 369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall***********************************************/ 379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if defined(SDL_BUGGY_MMX_MIXERS) /* buggy, so we're disabling them. --ryan. */ 399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if defined(__GNUC__) && defined(__i386__) && defined(SDL_ASSEMBLY_ROUTINES) 409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallvoid SDL_MixAudio_MMX_S16(char* dst,char* src,unsigned int size,int volume) 419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ( 439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movl %3,%%eax\n" /* eax = volume */ 459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movl %2,%%edx\n" /* edx = size */ 479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" shrl $4,%%edx\n" /* process 16 bytes per iteration = 8 samples */ 499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" jz .endS16\n" 519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" pxor %%mm0,%%mm0\n" 539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movd %%eax,%%mm0\n" 559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movq %%mm0,%%mm1\n" 569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" psllq $16,%%mm0\n" 579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" por %%mm1,%%mm0\n" 589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" psllq $16,%%mm0\n" 599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" por %%mm1,%%mm0\n" 609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" psllq $16,%%mm0\n" 619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" por %%mm1,%%mm0\n" /* mm0 = vol|vol|vol|vol */ 629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall".align 8\n" 649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" .mixloopS16:\n" 659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movq (%1),%%mm1\n" /* mm1 = a|b|c|d */ 679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movq %%mm1,%%mm2\n" /* mm2 = a|b|c|d */ 699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movq 8(%1),%%mm4\n" /* mm4 = e|f|g|h */ 719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* pr� charger le buffer dst dans mm7 */ 739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movq (%0),%%mm7\n" /* mm7 = dst[0] */ 749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* multiplier par le volume */ 769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" pmullw %%mm0,%%mm1\n" /* mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */ 779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" pmulhw %%mm0,%%mm2\n" /* mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v) */ 799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movq %%mm4,%%mm5\n" /* mm5 = e|f|g|h */ 809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" pmullw %%mm0,%%mm4\n" /* mm4 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */ 829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" pmulhw %%mm0,%%mm5\n" /* mm5 = h(e*v)|h(f*v)|h(g*v)|h(h*v) */ 849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movq %%mm1,%%mm3\n" /* mm3 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */ 859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" punpckhwd %%mm2,%%mm1\n" /* mm1 = a*v|b*v */ 879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movq %%mm4,%%mm6\n" /* mm6 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */ 899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" punpcklwd %%mm2,%%mm3\n" /* mm3 = c*v|d*v */ 909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" punpckhwd %%mm5,%%mm4\n" /* mm4 = e*f|f*v */ 929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" punpcklwd %%mm5,%%mm6\n" /* mm6 = g*v|h*v */ 949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* pr� charger le buffer dst dans mm5 */ 969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movq 8(%0),%%mm5\n" /* mm5 = dst[1] */ 979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* diviser par 128 */ 999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" psrad $7,%%mm1\n" /* mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME */ 1009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" add $16,%1\n" 1019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" psrad $7,%%mm3\n" /* mm3 = c*v/128|d*v/128 */ 1039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" psrad $7,%%mm4\n" /* mm4 = e*v/128|f*v/128 */ 1059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* mm1 = le sample avec le volume modifi� */ 1079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" packssdw %%mm1,%%mm3\n" /* mm3 = s(a*v|b*v|c*v|d*v) */ 1089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" psrad $7,%%mm6\n" /* mm6= g*v/128|h*v/128 */ 1109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" paddsw %%mm7,%%mm3\n" /* mm3 = adjust_volume(src)+dst */ 1119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* mm4 = le sample avec le volume modifi� */ 1139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" packssdw %%mm4,%%mm6\n" /* mm6 = s(e*v|f*v|g*v|h*v) */ 1149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movq %%mm3,(%0)\n" 1159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" paddsw %%mm5,%%mm6\n" /* mm6 = adjust_volume(src)+dst */ 1179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movq %%mm6,8(%0)\n" 1199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" add $16,%0\n" 1219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" dec %%edx\n" 1239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" jnz .mixloopS16\n" 1259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" emms\n" 1279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall".endS16:\n" 1299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : 1309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "r" (dst), "r"(src),"m"(size), 1319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "m"(volume) 1329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "eax","edx","memory" 1339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ); 1349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 1359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*////////////////////////////////////////////// */ 1399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* Mixing for 8 bit signed buffers */ 1409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*////////////////////////////////////////////// */ 1419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallvoid SDL_MixAudio_MMX_S8(char* dst,char* src,unsigned int size,int volume) 1439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 1449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ( 1459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movl %3,%%eax\n" /* eax = volume */ 1479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movd %%eax,%%mm0\n" 1499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movq %%mm0,%%mm1\n" 1509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" psllq $16,%%mm0\n" 1519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" por %%mm1,%%mm0\n" 1529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" psllq $16,%%mm0\n" 1539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" por %%mm1,%%mm0\n" 1549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" psllq $16,%%mm0\n" 1559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" por %%mm1,%%mm0\n" 1569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movl %2,%%edx\n" /* edx = size */ 1589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" shr $3,%%edx\n" /* process 8 bytes per iteration = 8 samples */ 1599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" cmp $0,%%edx\n" 1619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" je .endS8\n" 1629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall".align 8\n" 1649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" .mixloopS8:\n" 1659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" pxor %%mm2,%%mm2\n" /* mm2 = 0 */ 1679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movq (%1),%%mm1\n" /* mm1 = a|b|c|d|e|f|g|h */ 1689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movq %%mm1,%%mm3\n" /* mm3 = a|b|c|d|e|f|g|h */ 1709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0 */ 1729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" pcmpgtb %%mm1,%%mm2\n" /* mm2 = 11111111|00000000|00000000.... */ 1739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" punpckhbw %%mm2,%%mm1\n" /* mm1 = 0|a|0|b|0|c|0|d */ 1759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" punpcklbw %%mm2,%%mm3\n" /* mm3 = 0|e|0|f|0|g|0|h */ 1779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movq (%0),%%mm2\n" /* mm2 = destination */ 1789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" pmullw %%mm0,%%mm1\n" /* mm1 = v*a|v*b|v*c|v*d */ 1809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" add $8,%1\n" 1819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" pmullw %%mm0,%%mm3\n" /* mm3 = v*e|v*f|v*g|v*h */ 1839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" psraw $7,%%mm1\n" /* mm1 = v*a/128|v*b/128|v*c/128|v*d/128 */ 1849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" psraw $7,%%mm3\n" /* mm3 = v*e/128|v*f/128|v*g/128|v*h/128 */ 1869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" packsswb %%mm1,%%mm3\n" /* mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128 */ 1889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" paddsb %%mm2,%%mm3\n" /* add to destination buffer */ 1909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" movq %%mm3,(%0)\n" /* store back to ram */ 1929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" add $8,%0\n" 1939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" dec %%edx\n" 1959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" jnz .mixloopS8\n" 1979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall".endS8:\n" 1999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall" emms\n" 2009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : 2019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "r" (dst), "r"(src),"m"(size), 2029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "m"(volume) 2039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "eax","edx","memory" 2049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ); 2059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 2069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 2079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 208