17cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde/* 233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * Copyright 2011 The LibYuv Project Authors. All rights reserved. 37cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde * 47cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde * Use of this source code is governed by a BSD-style license 57cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde * that can be found in the LICENSE file in the root of the source 67cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde * tree. An additional intellectual property rights grant can be found 77cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde * in the file PATENTS. All contributing project authors may 87cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde * be found in the AUTHORS file in the root of the source tree. 97cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde */ 107cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 1133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include "libyuv/row.h" 127cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 1333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include "libyuv/basic_types.h" 1433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 1533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef __cplusplus 1633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampnamespace libyuv { 177cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordeextern "C" { 1833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 1933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 2033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// This module is for GCC x86 and x64 2133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if !defined(YUV_DISABLE_ASM) && (defined(__x86_64__) || defined(__i386__)) 2233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 2333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// GCC 4.2 on OSX has link error when passing static or const to inline. 2433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// TODO(fbarchard): Use static const when gcc 4.2 support is dropped. 2533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef __APPLE__ 2633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define CONST 2733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#else 2833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define CONST static const 2933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 307cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 317cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#ifdef HAS_ARGBTOYROW_SSSE3 327cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 3333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Constants for ARGB 3433cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST vec8 kARGBToY = { 3533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0 3633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 3733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 3833cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST vec8 kARGBToU = { 3933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0 4033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 4133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 4233cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST vec8 kARGBToV = { 4333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, 4433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 4533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 4633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Constants for BGRA 4733cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST vec8 kBGRAToY = { 4833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13 4933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 5033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 5133cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST vec8 kBGRAToU = { 5233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112 5333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 5433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 5533cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST vec8 kBGRAToV = { 5633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18 5733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 5833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 5933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Constants for ABGR 6033cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST vec8 kABGRToY = { 6133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0 6233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 6333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 6433cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST vec8 kABGRToU = { 6533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0 6633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 6733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 6833cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST vec8 kABGRToV = { 6933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0 7033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 7133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 7233cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST uvec8 kAddY16 = { 7333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u 747cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde}; 757cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 7633cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST uvec8 kAddUV128 = { 7733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 7833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u 797cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde}; 807cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 8133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Shuffle table for converting RGB24 to ARGB. 8233cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST uvec8 kShuffleMaskRGB24ToARGB = { 837cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u 847cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde}; 857cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 867cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde// Shuffle table for converting RAW to ARGB. 8733cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST uvec8 kShuffleMaskRAWToARGB = { 887cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u 897cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde}; 907cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 9133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Shuffle table for converting ABGR to ARGB. 9233cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST uvec8 kShuffleMaskABGRToARGB = { 9333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u 9433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 9533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 9633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Shuffle table for converting BGRA to ARGB. 9733cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST uvec8 kShuffleMaskBGRAToARGB = { 9833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u 9933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 10033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 10133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Shuffle table for converting RGBA to ARGB. 10233cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST uvec8 kShuffleMaskRGBAToARGB = { 10333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u 10433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 10533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 10633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Shuffle table for converting ARGB to RGBA. 10733cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST uvec8 kShuffleMaskARGBToRGBA = { 10833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u 10933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 11033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 11133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Shuffle table for converting ARGB to RGB24. 11233cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST uvec8 kShuffleMaskARGBToRGB24 = { 11333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u 11433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 11533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 11633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Shuffle table for converting ARGB to RAW. 11733cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST uvec8 kShuffleMaskARGBToRAW = { 11833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u 11933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 12033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 12133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { 12233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 12333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 12433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslld $0x18,%%xmm5 \n" 12533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 12633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 12733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%0),%%xmm0 \n" 12833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%0),%0 \n" 12933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm0,%%xmm0 \n" 13033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 13133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm0,%%xmm0 \n" 13233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm1,%%xmm1 \n" 13333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm5,%%xmm0 \n" 13433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm5,%%xmm1 \n" 13533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1) \n" 13633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,0x10(%1) \n" 13733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%1),%1 \n" 13833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%2 \n" 13933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 14033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_y), // %0 1417cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde "+r"(dst_argb), // %1 1427cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde "+r"(pix) // %2 14333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 14433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 14533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 14633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm5" 14733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 14833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 14933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 15033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 15133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix) { 15233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 15333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm5 \n" 15433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %0,%1 \n" 15533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 15633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 15733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 15833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm5,%%xmm0 \n" 15933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 16033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%0,%1,1) \n" 16133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 16233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 16333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 16433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_abgr), // %0 16533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_argb), // %1 16633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 16733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kShuffleMaskABGRToARGB) // %3 16833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 16933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 17033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm5" 17133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 17233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 17333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 17433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 17533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) { 17633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 17733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm5 \n" 17833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %0,%1 \n" 17933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 18033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 18133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 18233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm5,%%xmm0 \n" 18333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 18433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%0,%1,1) \n" 18533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 18633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 18733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_bgra), // %0 18833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_argb), // %1 18933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 19033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kShuffleMaskBGRAToARGB) // %3 19133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 19233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 19333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm5" 19433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 19533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 19633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 19733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 19833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid RGBAToARGBRow_SSSE3(const uint8* src_rgba, uint8* dst_argb, int pix) { 19933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 20033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm5 \n" 20133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %0,%1 \n" 20233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 20333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 20433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 20533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm5,%%xmm0 \n" 20633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 20733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%0,%1,1) \n" 20833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 20933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 21033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 21133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_rgba), // %0 21233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_argb), // %1 21333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 21433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kShuffleMaskRGBAToARGB) // %3 21533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 21633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 21733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm5" 21833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 21933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 22033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 22133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 22233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBToRGBARow_SSSE3(const uint8* src_argb, uint8* dst_rgba, int pix) { 22333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 22433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm5 \n" 22533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %0,%1 \n" 22633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 22733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 22833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 22933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm5,%%xmm0 \n" 23033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 23133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%0,%1,1) \n" 23233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 23333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 23433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 23533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_argb), // %0 23633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_rgba), // %1 23733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 23833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kShuffleMaskARGBToRGBA) // %3 23933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 24033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 24133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm5" 24233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 24333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 24433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 24533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 24633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) { 24733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 24833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000 24933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslld $0x18,%%xmm5 \n" 25033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm4 \n" 25133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 25233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 25333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm0 \n" 25433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0),%%xmm1 \n" 25533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x20(%0),%%xmm3 \n" 25633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x30(%0),%0 \n" 25733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm3,%%xmm2 \n" 25833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm1,%%xmm2 \n" 25933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm4,%%xmm2 \n" 26033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm5,%%xmm2 \n" 26133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0xc,%%xmm0,%%xmm1 \n" 26233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm4,%%xmm0 \n" 26333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,0x20(%1) \n" 26433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm5,%%xmm0 \n" 26533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm4,%%xmm1 \n" 26633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1) \n" 26733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm5,%%xmm1 \n" 26833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x4,%%xmm3,%%xmm3 \n" 26933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm4,%%xmm3 \n" 27033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,0x10(%1) \n" 27133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm5,%%xmm3 \n" 27233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%2 \n" 27333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm3,0x30(%1) \n" 27433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%1),%1 \n" 27533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 27633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_rgb24), // %0 27733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_argb), // %1 27833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 27933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kShuffleMaskRGB24ToARGB) // %3 28033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 28133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 28233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 28333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 28433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 2857cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 2867cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 2877cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordevoid RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) { 28833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 28933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000 29033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslld $0x18,%%xmm5 \n" 29133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm4 \n" 29233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 29333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 29433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm0 \n" 29533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0),%%xmm1 \n" 29633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x20(%0),%%xmm3 \n" 29733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x30(%0),%0 \n" 29833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm3,%%xmm2 \n" 29933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm1,%%xmm2 \n" 30033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm4,%%xmm2 \n" 30133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm5,%%xmm2 \n" 30233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0xc,%%xmm0,%%xmm1 \n" 30333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm4,%%xmm0 \n" 30433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,0x20(%1) \n" 30533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm5,%%xmm0 \n" 30633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm4,%%xmm1 \n" 30733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1) \n" 30833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm5,%%xmm1 \n" 30933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x4,%%xmm3,%%xmm3 \n" 31033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm4,%%xmm3 \n" 31133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,0x10(%1) \n" 31233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm5,%%xmm3 \n" 31333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%2 \n" 31433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm3,0x30(%1) \n" 31533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%1),%1 \n" 31633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 3177cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde : "+r"(src_raw), // %0 3187cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde "+r"(dst_argb), // %1 3197cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde "+r"(pix) // %2 32033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kShuffleMaskRAWToARGB) // %3 32133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 32233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 32333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 32433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 32533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 3267cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 32733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 32833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) { 32933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 33033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov $0x1080108,%%eax \n" 33133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%eax,%%xmm5 \n" 33233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x0,%%xmm5,%%xmm5 \n" 33333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov $0x20802080,%%eax \n" 33433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%eax,%%xmm6 \n" 33533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x0,%%xmm6,%%xmm6 \n" 33633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm3,%%xmm3 \n" 33733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psllw $0xb,%%xmm3 \n" 33833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm4,%%xmm4 \n" 33933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psllw $0xa,%%xmm4 \n" 34033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x5,%%xmm4 \n" 34133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm7,%%xmm7 \n" 34233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psllw $0x8,%%xmm7 \n" 34333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %0,%1 \n" 34433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %0,%1 \n" 34533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 34633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 34733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm0 \n" 34833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 34933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm2 \n" 35033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm3,%%xmm1 \n" 35133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psllw $0xb,%%xmm2 \n" 35233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhuw %%xmm5,%%xmm1 \n" 35333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhuw %%xmm5,%%xmm2 \n" 35433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psllw $0x8,%%xmm1 \n" 35533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm2,%%xmm1 \n" 35633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm4,%%xmm0 \n" 35733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhuw %%xmm6,%%xmm0 \n" 35833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm7,%%xmm0 \n" 35933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,%%xmm2 \n" 36033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm0,%%xmm1 \n" 36133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm0,%%xmm2 \n" 36233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,(%1,%0,2) \n" 36333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,0x10(%1,%0,2) \n" 36433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 36533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%2 \n" 36633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 36733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src), // %0 36833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst), // %1 36933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 37033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 37133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "eax" 37233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 37333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" 3747cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#endif 37533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 37633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 3777cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 37833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) { 37933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 38033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov $0x1080108,%%eax \n" 38133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%eax,%%xmm5 \n" 38233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x0,%%xmm5,%%xmm5 \n" 38333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov $0x42004200,%%eax \n" 38433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%eax,%%xmm6 \n" 38533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x0,%%xmm6,%%xmm6 \n" 38633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm3,%%xmm3 \n" 38733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psllw $0xb,%%xmm3 \n" 38833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm3,%%xmm4 \n" 38933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x6,%%xmm4 \n" 39033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm7,%%xmm7 \n" 39133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psllw $0x8,%%xmm7 \n" 39233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %0,%1 \n" 39333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %0,%1 \n" 39433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 39533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 39633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm0 \n" 39733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 39833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm2 \n" 39933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psllw $0x1,%%xmm1 \n" 40033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psllw $0xb,%%xmm2 \n" 40133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm3,%%xmm1 \n" 40233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhuw %%xmm5,%%xmm2 \n" 40333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhuw %%xmm5,%%xmm1 \n" 40433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psllw $0x8,%%xmm1 \n" 40533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm2,%%xmm1 \n" 40633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm2 \n" 40733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm4,%%xmm0 \n" 40833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x8,%%xmm2 \n" 40933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhuw %%xmm6,%%xmm0 \n" 41033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm7,%%xmm2 \n" 41133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm2,%%xmm0 \n" 41233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,%%xmm2 \n" 41333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm0,%%xmm1 \n" 41433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm0,%%xmm2 \n" 41533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,(%1,%0,2) \n" 41633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,0x10(%1,%0,2) \n" 41733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 41833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%2 \n" 41933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 42033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src), // %0 42133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst), // %1 42233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 42333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 42433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "eax" 42533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 42633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" 42733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 42833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 42933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 4307cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 43133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) { 43233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 43333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov $0xf0f0f0f,%%eax \n" 43433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%eax,%%xmm4 \n" 43533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x0,%%xmm4,%%xmm4 \n" 43633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm4,%%xmm5 \n" 43733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslld $0x4,%%xmm5 \n" 43833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %0,%1 \n" 43933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %0,%1 \n" 44033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 44133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 44233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm0 \n" 44333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm2 \n" 44433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm4,%%xmm0 \n" 44533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm2 \n" 44633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 44733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm3 \n" 44833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psllw $0x4,%%xmm1 \n" 44933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x4,%%xmm3 \n" 45033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm1,%%xmm0 \n" 45133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm3,%%xmm2 \n" 45233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 45333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm2,%%xmm0 \n" 45433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm2,%%xmm1 \n" 45533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1,%0,2) \n" 45633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,0x10(%1,%0,2) \n" 45733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 45833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%2 \n" 45933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 46033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src), // %0 46133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst), // %1 46233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 46333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 46433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "eax" 46533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 46633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 46733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 46833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 46933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 47033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 47133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) { 47233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 47333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm6 \n" 47433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 47533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 47633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 47733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 47833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x20(%0),%%xmm2 \n" 47933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x30(%0),%%xmm3 \n" 48033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%0),%0 \n" 48133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm6,%%xmm0 \n" 48233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm6,%%xmm1 \n" 48333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm6,%%xmm2 \n" 48433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm6,%%xmm3 \n" 48533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,%%xmm4 \n" 48633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrldq $0x4,%%xmm1 \n" 48733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslldq $0xc,%%xmm4 \n" 48833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm5 \n" 48933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm4,%%xmm0 \n" 49033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslldq $0x8,%%xmm5 \n" 49133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1) \n" 49233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm5,%%xmm1 \n" 49333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrldq $0x8,%%xmm2 \n" 49433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslldq $0x4,%%xmm3 \n" 49533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm3,%%xmm2 \n" 49633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,0x10(%1) \n" 49733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,0x20(%1) \n" 49833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x30(%1),%1 \n" 49933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%2 \n" 50033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 50133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src), // %0 50233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst), // %1 50333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 50433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kShuffleMaskARGBToRGB24) // %3 50533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 50633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 50733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" 50833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 50933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 51033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 51133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 51233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) { 51333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 51433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm6 \n" 51533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 51633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 51733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 51833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 51933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x20(%0),%%xmm2 \n" 52033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x30(%0),%%xmm3 \n" 52133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%0),%0 \n" 52233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm6,%%xmm0 \n" 52333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm6,%%xmm1 \n" 52433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm6,%%xmm2 \n" 52533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm6,%%xmm3 \n" 52633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,%%xmm4 \n" 52733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrldq $0x4,%%xmm1 \n" 52833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslldq $0xc,%%xmm4 \n" 52933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm5 \n" 53033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm4,%%xmm0 \n" 53133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslldq $0x8,%%xmm5 \n" 53233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1) \n" 53333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm5,%%xmm1 \n" 53433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrldq $0x8,%%xmm2 \n" 53533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslldq $0x4,%%xmm3 \n" 53633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm3,%%xmm2 \n" 53733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,0x10(%1) \n" 53833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,0x20(%1) \n" 53933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x30(%1),%1 \n" 54033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%2 \n" 54133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 54233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src), // %0 54333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst), // %1 54433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 54533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kShuffleMaskARGBToRAW) // %3 54633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 54733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 54833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" 54933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 55033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 55133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 55233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 55333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) { 55433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 55533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm3,%%xmm3 \n" 55633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrld $0x1b,%%xmm3 \n" 55733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm4,%%xmm4 \n" 55833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrld $0x1a,%%xmm4 \n" 55933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslld $0x5,%%xmm4 \n" 56033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 56133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslld $0xb,%%xmm5 \n" 56233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 56333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 56433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 56533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 56633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm2 \n" 56733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslld $0x8,%%xmm0 \n" 56833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrld $0x3,%%xmm1 \n" 56933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrld $0x5,%%xmm2 \n" 57033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrad $0x10,%%xmm0 \n" 57133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm3,%%xmm1 \n" 57233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm4,%%xmm2 \n" 57333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm0 \n" 57433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm2,%%xmm1 \n" 57533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm1,%%xmm0 \n" 57633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packssdw %%xmm0,%%xmm0 \n" 57733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 57833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm0,(%1) \n" 57933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%1),%1 \n" 58033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 58133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 58233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src), // %0 58333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst), // %1 58433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 58533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 58633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 58733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 58833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 58933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 59033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 59133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 59233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 59333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) { 59433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 59533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm4,%%xmm4 \n" 59633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrld $0x1b,%%xmm4 \n" 59733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm4,%%xmm5 \n" 59833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslld $0x5,%%xmm5 \n" 59933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm4,%%xmm6 \n" 60033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslld $0xa,%%xmm6 \n" 60133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm7,%%xmm7 \n" 60233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslld $0xf,%%xmm7 \n" 60333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 60433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 60533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 60633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 60733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm2 \n" 60833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm3 \n" 60933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrad $0x10,%%xmm0 \n" 61033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrld $0x3,%%xmm1 \n" 61133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrld $0x6,%%xmm2 \n" 61233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrld $0x9,%%xmm3 \n" 61333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm7,%%xmm0 \n" 61433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm4,%%xmm1 \n" 61533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm2 \n" 61633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm6,%%xmm3 \n" 61733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm1,%%xmm0 \n" 61833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm3,%%xmm2 \n" 61933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm2,%%xmm0 \n" 62033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packssdw %%xmm0,%%xmm0 \n" 62133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 62233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm0,(%1) \n" 62333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%1),%1 \n" 62433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 62533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 62633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src), // %0 62733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst), // %1 62833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 62933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 63033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 63133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 63233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" 63333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 63433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 63533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 63633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 63733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) { 63833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 63933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm4,%%xmm4 \n" 64033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psllw $0xc,%%xmm4 \n" 64133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm4,%%xmm3 \n" 64233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm3 \n" 64333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 64433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 64533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 64633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 64733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm3,%%xmm0 \n" 64833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm4,%%xmm1 \n" 64933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlq $0x4,%%xmm0 \n" 65033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlq $0x8,%%xmm1 \n" 65133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm1,%%xmm0 \n" 65233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm0,%%xmm0 \n" 65333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 65433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm0,(%1) \n" 65533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%1),%1 \n" 65633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 65733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 65833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src), // %0 65933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst), // %1 66033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 66133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 66233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 66333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 66433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" 66533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 66633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 66733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 66833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 66933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { 67033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 67133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %4,%%xmm5 \n" 67233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm4 \n" 67333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 67433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 67533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 67633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 67733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x20(%0),%%xmm2 \n" 67833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x30(%0),%%xmm3 \n" 67933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm0 \n" 68033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm1 \n" 68133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm2 \n" 68233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm3 \n" 68333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%0),%0 \n" 68433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm1,%%xmm0 \n" 68533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm3,%%xmm2 \n" 68633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm0 \n" 68733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm2 \n" 68833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm2,%%xmm0 \n" 68933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddb %%xmm5,%%xmm0 \n" 69033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%2 \n" 69133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1) \n" 69233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 69333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 69433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_argb), // %0 69533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_y), // %1 69633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 69733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kARGBToY), // %3 69833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kAddY16) // %4 69933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 70033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 70133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 70233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 70333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 70433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 70533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 70633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { 70733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 70833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %4,%%xmm5 \n" 70933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm4 \n" 71033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 71133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 71233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm0 \n" 71333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0),%%xmm1 \n" 71433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x20(%0),%%xmm2 \n" 71533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x30(%0),%%xmm3 \n" 71633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm0 \n" 71733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm1 \n" 71833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm2 \n" 71933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm3 \n" 72033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%0),%0 \n" 72133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm1,%%xmm0 \n" 72233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm3,%%xmm2 \n" 72333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm0 \n" 72433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm2 \n" 72533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm2,%%xmm0 \n" 72633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddb %%xmm5,%%xmm0 \n" 72733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%2 \n" 72833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm0,(%1) \n" 72933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 73033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 73133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_argb), // %0 73233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_y), // %1 73333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 73433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kARGBToY), // %3 73533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kAddY16) // %4 73633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 73733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 73833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 73933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 74033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 74133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 74233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 74333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// TODO(fbarchard): pass xmm constants to single block of assembly. 74433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// fpic on GCC 4.2 for OSX runs out of GPR registers. "m" effectively takes 74533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// 3 registers - ebx, ebp and eax. "m" can be passed with 3 normal registers, 74633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// or 4 if stack frame is disabled. Doing 2 assembly blocks is a work around 74733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// and considered unsafe. 74833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, 74933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_u, uint8* dst_v, int width) { 75033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 75133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %0,%%xmm4 \n" 75233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %1,%%xmm3 \n" 75333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %2,%%xmm5 \n" 75433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 75533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kARGBToU), // %0 75633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kARGBToV), // %1 75733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kAddUV128) // %2 75833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 75933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 76033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%2 \n" 76133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 76233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 76333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 76433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 76533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x20(%0),%%xmm2 \n" 76633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x30(%0),%%xmm6 \n" 76733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb (%0,%4,1),%%xmm0 \n" 76833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb 0x10(%0,%4,1),%%xmm1 \n" 76933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb 0x20(%0,%4,1),%%xmm2 \n" 77033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb 0x30(%0,%4,1),%%xmm6 \n" 77133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%0),%0 \n" 77233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm7 \n" 77333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0x88,%%xmm1,%%xmm0 \n" 77433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0xdd,%%xmm1,%%xmm7 \n" 77533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm0 \n" 77633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm7 \n" 77733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0x88,%%xmm6,%%xmm2 \n" 77833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0xdd,%%xmm6,%%xmm7 \n" 77933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm2 \n" 78033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 78133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm6 \n" 78233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm0 \n" 78333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm2 \n" 78433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm3,%%xmm1 \n" 78533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm3,%%xmm6 \n" 78633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm2,%%xmm0 \n" 78733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm6,%%xmm1 \n" 78833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x8,%%xmm0 \n" 78933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x8,%%xmm1 \n" 79033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packsswb %%xmm1,%%xmm0 \n" 79133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddb %%xmm5,%%xmm0 \n" 79233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%3 \n" 79333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlps %%xmm0,(%1) \n" 79433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhps %%xmm0,(%1,%2,1) \n" 79533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%1),%1 \n" 79633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 79733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_argb0), // %0 79833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 79933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 80033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+rm"(width) // %3 80133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(static_cast<intptr_t>(src_stride_argb)) 80233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 80333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 80433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" 80533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 80633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 80733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 80833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 80933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, 81033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_u, uint8* dst_v, int width) { 81133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 81233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %0,%%xmm4 \n" 81333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %1,%%xmm3 \n" 81433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %2,%%xmm5 \n" 81533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 81633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kARGBToU), // %0 81733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kARGBToV), // %1 81833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kAddUV128) // %2 81933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 82033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 82133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%2 \n" 82233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 82333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 82433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm0 \n" 82533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0),%%xmm1 \n" 82633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x20(%0),%%xmm2 \n" 82733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x30(%0),%%xmm6 \n" 82833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0,%4,1),%%xmm7 \n" 82933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm0 \n" 83033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0,%4,1),%%xmm7 \n" 83133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm1 \n" 83233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x20(%0,%4,1),%%xmm7 \n" 83333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm2 \n" 83433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x30(%0,%4,1),%%xmm7 \n" 83533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm6 \n" 83633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%0),%0 \n" 83733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm7 \n" 83833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0x88,%%xmm1,%%xmm0 \n" 83933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0xdd,%%xmm1,%%xmm7 \n" 84033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm0 \n" 84133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm7 \n" 84233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0x88,%%xmm6,%%xmm2 \n" 84333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0xdd,%%xmm6,%%xmm7 \n" 84433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm2 \n" 84533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 84633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm6 \n" 84733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm0 \n" 84833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm2 \n" 84933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm3,%%xmm1 \n" 85033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm3,%%xmm6 \n" 85133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm2,%%xmm0 \n" 85233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm6,%%xmm1 \n" 85333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x8,%%xmm0 \n" 85433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x8,%%xmm1 \n" 85533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packsswb %%xmm1,%%xmm0 \n" 85633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddb %%xmm5,%%xmm0 \n" 85733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%3 \n" 85833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlps %%xmm0,(%1) \n" 85933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhps %%xmm0,(%1,%2,1) \n" 86033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%1),%1 \n" 86133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 86233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_argb0), // %0 86333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 86433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 86533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+rm"(width) // %3 86633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(static_cast<intptr_t>(src_stride_argb)) 86733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 86833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 86933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" 87033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 87133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 87233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 87333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 87433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) { 87533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 87633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %4,%%xmm5 \n" 87733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm4 \n" 87833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 87933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 88033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 88133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 88233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x20(%0),%%xmm2 \n" 88333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x30(%0),%%xmm3 \n" 88433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm0 \n" 88533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm1 \n" 88633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm2 \n" 88733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm3 \n" 88833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%0),%0 \n" 88933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm1,%%xmm0 \n" 89033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm3,%%xmm2 \n" 89133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm0 \n" 89233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm2 \n" 89333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm2,%%xmm0 \n" 89433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddb %%xmm5,%%xmm0 \n" 89533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%2 \n" 89633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1) \n" 89733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 89833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 89933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_bgra), // %0 90033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_y), // %1 90133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 90233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kBGRAToY), // %3 90333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kAddY16) // %4 90433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 90533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 90633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 90733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 90833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 90933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 91033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 91133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) { 91233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 91333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %4,%%xmm5 \n" 91433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm4 \n" 91533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 91633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 91733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm0 \n" 91833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0),%%xmm1 \n" 91933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x20(%0),%%xmm2 \n" 92033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x30(%0),%%xmm3 \n" 92133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm0 \n" 92233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm1 \n" 92333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm2 \n" 92433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm3 \n" 92533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%0),%0 \n" 92633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm1,%%xmm0 \n" 92733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm3,%%xmm2 \n" 92833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm0 \n" 92933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm2 \n" 93033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm2,%%xmm0 \n" 93133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddb %%xmm5,%%xmm0 \n" 93233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%2 \n" 93333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm0,(%1) \n" 93433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 93533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 93633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_bgra), // %0 93733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_y), // %1 93833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 93933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kBGRAToY), // %3 94033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kAddY16) // %4 94133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 94233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 94333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 94433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 94533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 94633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 94733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 94833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra, 94933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_u, uint8* dst_v, int width) { 95033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 95133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %0,%%xmm4 \n" 95233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %1,%%xmm3 \n" 95333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %2,%%xmm5 \n" 95433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 95533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kBGRAToU), // %0 95633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kBGRAToV), // %1 95733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kAddUV128) // %2 95833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 95933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 96033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%2 \n" 96133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 96233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 96333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 96433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 96533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x20(%0),%%xmm2 \n" 96633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x30(%0),%%xmm6 \n" 96733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb (%0,%4,1),%%xmm0 \n" 96833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb 0x10(%0,%4,1),%%xmm1 \n" 96933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb 0x20(%0,%4,1),%%xmm2 \n" 97033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb 0x30(%0,%4,1),%%xmm6 \n" 97133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%0),%0 \n" 97233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm7 \n" 97333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0x88,%%xmm1,%%xmm0 \n" 97433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0xdd,%%xmm1,%%xmm7 \n" 97533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm0 \n" 97633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm7 \n" 97733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0x88,%%xmm6,%%xmm2 \n" 97833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0xdd,%%xmm6,%%xmm7 \n" 97933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm2 \n" 98033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 98133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm6 \n" 98233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm0 \n" 98333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm2 \n" 98433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm3,%%xmm1 \n" 98533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm3,%%xmm6 \n" 98633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm2,%%xmm0 \n" 98733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm6,%%xmm1 \n" 98833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x8,%%xmm0 \n" 98933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x8,%%xmm1 \n" 99033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packsswb %%xmm1,%%xmm0 \n" 99133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddb %%xmm5,%%xmm0 \n" 99233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%3 \n" 99333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlps %%xmm0,(%1) \n" 99433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhps %%xmm0,(%1,%2,1) \n" 99533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%1),%1 \n" 99633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 99733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_bgra0), // %0 99833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 99933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 100033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+rm"(width) // %3 100133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(static_cast<intptr_t>(src_stride_bgra)) 100233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 100333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 100433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" 100533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 100633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 100733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 100833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 100933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra, 101033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_u, uint8* dst_v, int width) { 101133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 101233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %0,%%xmm4 \n" 101333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %1,%%xmm3 \n" 101433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %2,%%xmm5 \n" 101533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 101633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kBGRAToU), // %0 101733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kBGRAToV), // %1 101833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kAddUV128) // %2 101933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 102033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 102133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%2 \n" 102233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 102333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 102433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm0 \n" 102533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0),%%xmm1 \n" 102633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x20(%0),%%xmm2 \n" 102733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x30(%0),%%xmm6 \n" 102833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0,%4,1),%%xmm7 \n" 102933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm0 \n" 103033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0,%4,1),%%xmm7 \n" 103133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm1 \n" 103233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x20(%0,%4,1),%%xmm7 \n" 103333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm2 \n" 103433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x30(%0,%4,1),%%xmm7 \n" 103533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm6 \n" 103633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%0),%0 \n" 103733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm7 \n" 103833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0x88,%%xmm1,%%xmm0 \n" 103933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0xdd,%%xmm1,%%xmm7 \n" 104033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm0 \n" 104133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm7 \n" 104233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0x88,%%xmm6,%%xmm2 \n" 104333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0xdd,%%xmm6,%%xmm7 \n" 104433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm2 \n" 104533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 104633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm6 \n" 104733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm0 \n" 104833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm2 \n" 104933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm3,%%xmm1 \n" 105033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm3,%%xmm6 \n" 105133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm2,%%xmm0 \n" 105233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm6,%%xmm1 \n" 105333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x8,%%xmm0 \n" 105433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x8,%%xmm1 \n" 105533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packsswb %%xmm1,%%xmm0 \n" 105633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddb %%xmm5,%%xmm0 \n" 105733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%3 \n" 105833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlps %%xmm0,(%1) \n" 105933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhps %%xmm0,(%1,%2,1) \n" 106033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%1),%1 \n" 106133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 106233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_bgra0), // %0 106333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 106433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 106533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+rm"(width) // %3 106633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(static_cast<intptr_t>(src_stride_bgra)) 106733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 106833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 106933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" 107033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 107133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 107233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 107333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 107433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) { 107533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 107633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %4,%%xmm5 \n" 107733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm4 \n" 107833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 107933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 108033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 108133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 108233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x20(%0),%%xmm2 \n" 108333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x30(%0),%%xmm3 \n" 108433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm0 \n" 108533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm1 \n" 108633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm2 \n" 108733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm3 \n" 108833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%0),%0 \n" 108933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm1,%%xmm0 \n" 109033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm3,%%xmm2 \n" 109133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm0 \n" 109233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm2 \n" 109333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm2,%%xmm0 \n" 109433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddb %%xmm5,%%xmm0 \n" 109533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%2 \n" 109633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1) \n" 109733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 109833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 109933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_abgr), // %0 110033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_y), // %1 110133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 110233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kABGRToY), // %3 110333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kAddY16) // %4 110433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 110533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 110633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 110733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 110833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 110933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 111033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 111133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) { 111233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 111333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %4,%%xmm5 \n" 111433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm4 \n" 111533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 111633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 111733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm0 \n" 111833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0),%%xmm1 \n" 111933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x20(%0),%%xmm2 \n" 112033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x30(%0),%%xmm3 \n" 112133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm0 \n" 112233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm1 \n" 112333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm2 \n" 112433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm3 \n" 112533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%0),%0 \n" 112633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm1,%%xmm0 \n" 112733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm3,%%xmm2 \n" 112833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm0 \n" 112933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm2 \n" 113033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm2,%%xmm0 \n" 113133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddb %%xmm5,%%xmm0 \n" 113233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%2 \n" 113333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm0,(%1) \n" 113433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 113533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 113633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_abgr), // %0 113733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_y), // %1 113833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 113933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kABGRToY), // %3 114033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kAddY16) // %4 114133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 114233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 114333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 114433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 114533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 114633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 114733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 114833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr, 114933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_u, uint8* dst_v, int width) { 115033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 115133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %0,%%xmm4 \n" 115233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %1,%%xmm3 \n" 115333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %2,%%xmm5 \n" 115433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 115533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kABGRToU), // %0 115633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kABGRToV), // %1 115733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kAddUV128) // %2 115833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 115933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 116033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%2 \n" 116133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 116233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 116333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 116433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 116533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x20(%0),%%xmm2 \n" 116633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x30(%0),%%xmm6 \n" 116733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb (%0,%4,1),%%xmm0 \n" 116833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb 0x10(%0,%4,1),%%xmm1 \n" 116933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb 0x20(%0,%4,1),%%xmm2 \n" 117033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb 0x30(%0,%4,1),%%xmm6 \n" 117133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%0),%0 \n" 117233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm7 \n" 117333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0x88,%%xmm1,%%xmm0 \n" 117433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0xdd,%%xmm1,%%xmm7 \n" 117533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm0 \n" 117633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm7 \n" 117733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0x88,%%xmm6,%%xmm2 \n" 117833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0xdd,%%xmm6,%%xmm7 \n" 117933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm2 \n" 118033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 118133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm6 \n" 118233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm0 \n" 118333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm2 \n" 118433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm3,%%xmm1 \n" 118533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm3,%%xmm6 \n" 118633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm2,%%xmm0 \n" 118733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm6,%%xmm1 \n" 118833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x8,%%xmm0 \n" 118933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x8,%%xmm1 \n" 119033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packsswb %%xmm1,%%xmm0 \n" 119133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddb %%xmm5,%%xmm0 \n" 119233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%3 \n" 119333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlps %%xmm0,(%1) \n" 119433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhps %%xmm0,(%1,%2,1) \n" 119533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%1),%1 \n" 119633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 119733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_abgr0), // %0 119833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 119933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 120033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+rm"(width) // %3 120133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(static_cast<intptr_t>(src_stride_abgr)) 120233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 120333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 120433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" 120533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 120633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 120733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 120833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 120933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr, 121033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_u, uint8* dst_v, int width) { 121133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 121233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %0,%%xmm4 \n" 121333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %1,%%xmm3 \n" 121433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %2,%%xmm5 \n" 121533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 121633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kABGRToU), // %0 121733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kABGRToV), // %1 121833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kAddUV128) // %2 121933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 122033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 122133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%2 \n" 122233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 122333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 122433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm0 \n" 122533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0),%%xmm1 \n" 122633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x20(%0),%%xmm2 \n" 122733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x30(%0),%%xmm6 \n" 122833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0,%4,1),%%xmm7 \n" 122933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm0 \n" 123033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0,%4,1),%%xmm7 \n" 123133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm1 \n" 123233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x20(%0,%4,1),%%xmm7 \n" 123333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm2 \n" 123433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x30(%0,%4,1),%%xmm7 \n" 123533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm6 \n" 123633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%0),%0 \n" 123733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm7 \n" 123833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0x88,%%xmm1,%%xmm0 \n" 123933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0xdd,%%xmm1,%%xmm7 \n" 124033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm0 \n" 124133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm7 \n" 124233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0x88,%%xmm6,%%xmm2 \n" 124333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0xdd,%%xmm6,%%xmm7 \n" 124433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm7,%%xmm2 \n" 124533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 124633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm6 \n" 124733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm0 \n" 124833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm2 \n" 124933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm3,%%xmm1 \n" 125033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm3,%%xmm6 \n" 125133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm2,%%xmm0 \n" 125233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm6,%%xmm1 \n" 125333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x8,%%xmm0 \n" 125433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x8,%%xmm1 \n" 125533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packsswb %%xmm1,%%xmm0 \n" 125633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddb %%xmm5,%%xmm0 \n" 125733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%3 \n" 125833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlps %%xmm0,(%1) \n" 125933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhps %%xmm0,(%1,%2,1) \n" 126033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%1),%1 \n" 126133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 126233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_abgr0), // %0 126333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 126433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 126533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+rm"(width) // %3 126633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(static_cast<intptr_t>(src_stride_abgr)) 126733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 126833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 126933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" 127033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 127133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 127233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 127333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_ARGBTOYROW_SSSE3 127433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 127533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_I422TOARGBROW_SSSE3 127633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define UB 127 /* min(63,static_cast<int8>(2.018 * 64)) */ 127733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define UG -25 /* static_cast<int8>(-0.391 * 64 - 0.5) */ 127833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define UR 0 127933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 128033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define VB 0 128133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define VG -52 /* static_cast<int8>(-0.813 * 64 - 0.5) */ 128233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define VR 102 /* static_cast<int8>(1.596 * 64 + 0.5) */ 128333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 128433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Bias 128533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define BB UB * 128 + VB * 128 128633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define BG UG * 128 + VG * 128 128733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define BR UR * 128 + VR * 128 128833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 128933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define YG 74 /* static_cast<int8>(1.164 * 64 + 0.5) */ 129033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 129133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstruct { 129233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp vec8 kUVToB; // 0 129333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp vec8 kUVToG; // 16 129433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp vec8 kUVToR; // 32 129533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp vec16 kUVBiasB; // 48 129633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp vec16 kUVBiasG; // 64 129733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp vec16 kUVBiasR; // 80 129833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp vec16 kYSub16; // 96 129933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp vec16 kYToRgb; // 112 130033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp vec8 kVUToB; // 128 130133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp vec8 kVUToG; // 144 130233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp vec8 kVUToR; // 160 130333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} CONST SIMD_ALIGNED(kYuvConstants) = { 130433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp { UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB }, 130533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG }, 130633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp { UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR }, 130733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp { BB, BB, BB, BB, BB, BB, BB, BB }, 130833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp { BG, BG, BG, BG, BG, BG, BG, BG }, 130933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp { BR, BR, BR, BR, BR, BR, BR, BR }, 131033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp { 16, 16, 16, 16, 16, 16, 16, 16 }, 131133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp { YG, YG, YG, YG, YG, YG, YG, YG }, 131233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp { VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB }, 131333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG }, 131433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp { VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR } 131533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 131633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 131733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 131833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Read 8 UV from 411 131933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define READYUV444 \ 132033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%[u_buf]),%%xmm0 \n" \ 132133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%[u_buf],%[v_buf],1),%%xmm1 \n" \ 132233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%[u_buf]),%[u_buf] \n" \ 132333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm0 \n" \ 132433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 132533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Read 4 UV from 422, upsample to 8 UV 132633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define READYUV422 \ 132733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%[u_buf]),%%xmm0 \n" \ 132833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%[u_buf],%[v_buf],1),%%xmm1 \n" \ 132933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x4(%[u_buf]),%[u_buf] \n" \ 133033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm0 \n" \ 133133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm0,%%xmm0 \n" \ 133233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 133333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Read 2 UV from 411, upsample to 8 UV 133433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define READYUV411 \ 133533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%[u_buf]),%%xmm0 \n" \ 133633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%[u_buf],%[v_buf],1),%%xmm1 \n" \ 133733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x2(%[u_buf]),%[u_buf] \n" \ 133833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm0 \n" \ 133933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm0,%%xmm0 \n" \ 134033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm0,%%xmm0 \n" \ 134133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 134233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Read 4 UV from NV12, upsample to 8 UV 134333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define READNV12 \ 134433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%[uv_buf]),%%xmm0 \n" \ 134533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%[uv_buf]),%[uv_buf] \n" \ 134633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm0,%%xmm0 \n" \ 134733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 134833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Convert 8 pixels: 8 UV and 8 Y 134933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define YUVTORGB \ 135033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" \ 135133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm2 \n" \ 135233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw (%[kYuvConstants]),%%xmm0 \n" \ 135333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw 16(%[kYuvConstants]),%%xmm1 \n" \ 135433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw 32(%[kYuvConstants]),%%xmm2 \n" \ 135533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubw 48(%[kYuvConstants]),%%xmm0 \n" \ 135633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubw 64(%[kYuvConstants]),%%xmm1 \n" \ 135733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubw 80(%[kYuvConstants]),%%xmm2 \n" \ 135833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%[y_buf]),%%xmm3 \n" \ 135933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%[y_buf]),%[y_buf] \n" \ 136033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm4,%%xmm3 \n" \ 136133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubsw 96(%[kYuvConstants]),%%xmm3 \n" \ 136233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw 112(%[kYuvConstants]),%%xmm3 \n" \ 136333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddsw %%xmm3,%%xmm0 \n" \ 136433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddsw %%xmm3,%%xmm1 \n" \ 136533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddsw %%xmm3,%%xmm2 \n" \ 136633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x6,%%xmm0 \n" \ 136733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x6,%%xmm1 \n" \ 136833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x6,%%xmm2 \n" \ 136933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm0,%%xmm0 \n" \ 137033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm1 \n" \ 137133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm2,%%xmm2 \n" \ 137233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 137333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Convert 8 pixels: 8 VU and 8 Y 137433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define YVUTORGB \ 137533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" \ 137633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm2 \n" \ 137733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw 128(%[kYuvConstants]),%%xmm0 \n" \ 137833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw 144(%[kYuvConstants]),%%xmm1 \n" \ 137933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw 160(%[kYuvConstants]),%%xmm2 \n" \ 138033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubw 48(%[kYuvConstants]),%%xmm0 \n" \ 138133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubw 64(%[kYuvConstants]),%%xmm1 \n" \ 138233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubw 80(%[kYuvConstants]),%%xmm2 \n" \ 138333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%[y_buf]),%%xmm3 \n" \ 138433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%[y_buf]),%[y_buf] \n" \ 138533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm4,%%xmm3 \n" \ 138633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubsw 96(%[kYuvConstants]),%%xmm3 \n" \ 138733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw 112(%[kYuvConstants]),%%xmm3 \n" \ 138833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddsw %%xmm3,%%xmm0 \n" \ 138933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddsw %%xmm3,%%xmm1 \n" \ 139033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddsw %%xmm3,%%xmm2 \n" \ 139133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x6,%%xmm0 \n" \ 139233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x6,%%xmm1 \n" \ 139333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x6,%%xmm2 \n" \ 139433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm0,%%xmm0 \n" \ 139533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm1 \n" \ 139633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm2,%%xmm2 \n" \ 139733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 139833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, 139933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* u_buf, 140033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* v_buf, 140133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* argb_buf, 140233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 140333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 140433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %[u_buf],%[v_buf] \n" 140533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 140633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm4 \n" 140733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 140833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 140933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READYUV444 141033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUVTORGB 141133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm0 \n" 141233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm2 \n" 141333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 141433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm2,%%xmm0 \n" 141533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm2,%%xmm1 \n" 141633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%[argb_buf]) \n" 141733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,0x10(%[argb_buf]) \n" 141833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%[argb_buf]),%[argb_buf] \n" 141933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%[width] \n" 142033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 142133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [y_buf]"+r"(y_buf), // %[y_buf] 142233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [u_buf]"+r"(u_buf), // %[u_buf] 142333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [v_buf]"+r"(v_buf), // %[v_buf] 142433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [argb_buf]"+r"(argb_buf), // %[argb_buf] 142533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [width]"+rm"(width) // %[width] 142633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 142733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 142833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 142933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 143033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 143133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 143233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 143333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 143433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, 143533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* u_buf, 143633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* v_buf, 143733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* argb_buf, 143833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 143933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 144033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %[u_buf],%[v_buf] \n" 144133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 144233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm4 \n" 144333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 144433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 144533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READYUV422 144633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUVTORGB 144733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm0 \n" 144833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm2 \n" 144933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 145033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm2,%%xmm0 \n" 145133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm2,%%xmm1 \n" 145233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%[argb_buf]) \n" 145333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,0x10(%[argb_buf]) \n" 145433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%[argb_buf]),%[argb_buf] \n" 145533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%[width] \n" 145633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 145733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [y_buf]"+r"(y_buf), // %[y_buf] 145833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [u_buf]"+r"(u_buf), // %[u_buf] 145933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [v_buf]"+r"(v_buf), // %[v_buf] 146033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [argb_buf]"+r"(argb_buf), // %[argb_buf] 146133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [width]"+rm"(width) // %[width] 146233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 146333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 146433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 146533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 146633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 146733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 146833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 146933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 147033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, 147133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* u_buf, 147233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* v_buf, 147333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* argb_buf, 147433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 147533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 147633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %[u_buf],%[v_buf] \n" 147733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 147833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm4 \n" 147933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 148033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 148133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READYUV411 148233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUVTORGB 148333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm0 \n" 148433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm2 \n" 148533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 148633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm2,%%xmm0 \n" 148733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm2,%%xmm1 \n" 148833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%[argb_buf]) \n" 148933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,0x10(%[argb_buf]) \n" 149033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%[argb_buf]),%[argb_buf] \n" 149133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%[width] \n" 149233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 149333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [y_buf]"+r"(y_buf), // %[y_buf] 149433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [u_buf]"+r"(u_buf), // %[u_buf] 149533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [v_buf]"+r"(v_buf), // %[v_buf] 149633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [argb_buf]"+r"(argb_buf), // %[argb_buf] 149733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [width]"+rm"(width) // %[width] 149833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 149933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 150033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 150133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 150233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 150333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 150433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 150533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 150633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, 150733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* uv_buf, 150833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* argb_buf, 150933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 151033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 151133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 151233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm4 \n" 151333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 151433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 151533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READNV12 151633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUVTORGB 151733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm0 \n" 151833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm2 \n" 151933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 152033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm2,%%xmm0 \n" 152133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm2,%%xmm1 \n" 152233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%[argb_buf]) \n" 152333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,0x10(%[argb_buf]) \n" 152433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%[argb_buf]),%[argb_buf] \n" 152533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%[width] \n" 152633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 152733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [y_buf]"+r"(y_buf), // %[y_buf] 152833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [uv_buf]"+r"(uv_buf), // %[uv_buf] 152933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [argb_buf]"+r"(argb_buf), // %[argb_buf] 153033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [width]"+rm"(width) // %[width] 153133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 153233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 153333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 153433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 153533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 153633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 153733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 153833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 153933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, 154033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* vu_buf, 154133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* argb_buf, 154233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 154333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 154433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 154533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm4 \n" 154633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 154733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 154833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READNV12 154933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YVUTORGB 155033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm0 \n" 155133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm2 \n" 155233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 155333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm2,%%xmm0 \n" 155433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm2,%%xmm1 \n" 155533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%[argb_buf]) \n" 155633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,0x10(%[argb_buf]) \n" 155733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%[argb_buf]),%[argb_buf] \n" 155833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%[width] \n" 155933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 156033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [y_buf]"+r"(y_buf), // %[y_buf] 156133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [uv_buf]"+r"(vu_buf), // %[uv_buf] 156233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [argb_buf]"+r"(argb_buf), // %[argb_buf] 156333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [width]"+rm"(width) // %[width] 156433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 156533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 156633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 156733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 156833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 156933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 157033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 157133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 157233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid OMITFP I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, 157333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* u_buf, 157433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* v_buf, 157533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* argb_buf, 157633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 157733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 157833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %[u_buf],%[v_buf] \n" 157933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 158033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm4 \n" 158133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 158233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 158333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READYUV444 158433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUVTORGB 158533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm0 \n" 158633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm2 \n" 158733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 158833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm2,%%xmm0 \n" 158933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm2,%%xmm1 \n" 159033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm0,(%[argb_buf]) \n" 159133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm1,0x10(%[argb_buf]) \n" 159233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%[argb_buf]),%[argb_buf] \n" 159333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%[width] \n" 159433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 159533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [y_buf]"+r"(y_buf), // %[y_buf] 159633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [u_buf]"+r"(u_buf), // %[u_buf] 159733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [v_buf]"+r"(v_buf), // %[v_buf] 159833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [argb_buf]"+r"(argb_buf), // %[argb_buf] 159933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [width]"+rm"(width) // %[width] 160033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 160133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 160233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 160333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 160433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 160533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 160633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 160733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 160833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid OMITFP I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, 160933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* u_buf, 161033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* v_buf, 161133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* argb_buf, 161233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 161333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 161433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %[u_buf],%[v_buf] \n" 161533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 161633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm4 \n" 161733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 161833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 161933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READYUV422 162033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUVTORGB 162133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm0 \n" 162233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm2 \n" 162333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 162433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm2,%%xmm0 \n" 162533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm2,%%xmm1 \n" 162633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm0,(%[argb_buf]) \n" 162733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm1,0x10(%[argb_buf]) \n" 162833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%[argb_buf]),%[argb_buf] \n" 162933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%[width] \n" 163033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 163133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [y_buf]"+r"(y_buf), // %[y_buf] 163233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [u_buf]"+r"(u_buf), // %[u_buf] 163333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [v_buf]"+r"(v_buf), // %[v_buf] 163433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [argb_buf]"+r"(argb_buf), // %[argb_buf] 163533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [width]"+rm"(width) // %[width] 163633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 163733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 163833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 163933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 164033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 164133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 164233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 164333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 164433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid OMITFP I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, 164533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* u_buf, 164633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* v_buf, 164733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* argb_buf, 164833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 164933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 165033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %[u_buf],%[v_buf] \n" 165133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 165233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm4 \n" 165333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 165433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 165533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READYUV411 165633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUVTORGB 165733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm0 \n" 165833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm2 \n" 165933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 166033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm2,%%xmm0 \n" 166133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm2,%%xmm1 \n" 166233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm0,(%[argb_buf]) \n" 166333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm1,0x10(%[argb_buf]) \n" 166433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%[argb_buf]),%[argb_buf] \n" 166533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%[width] \n" 166633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 166733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [y_buf]"+r"(y_buf), // %[y_buf] 166833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [u_buf]"+r"(u_buf), // %[u_buf] 166933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [v_buf]"+r"(v_buf), // %[v_buf] 167033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [argb_buf]"+r"(argb_buf), // %[argb_buf] 167133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [width]"+rm"(width) // %[width] 167233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 167333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 167433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 167533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 167633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 167733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 167833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 167933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 168033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid OMITFP NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, 168133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* uv_buf, 168233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* argb_buf, 168333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 168433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 168533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 168633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm4 \n" 168733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 168833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 168933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READNV12 169033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUVTORGB 169133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm0 \n" 169233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm2 \n" 169333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 169433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm2,%%xmm0 \n" 169533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm2,%%xmm1 \n" 169633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm0,(%[argb_buf]) \n" 169733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm1,0x10(%[argb_buf]) \n" 169833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%[argb_buf]),%[argb_buf] \n" 169933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%[width] \n" 170033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 170133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [y_buf]"+r"(y_buf), // %[y_buf] 170233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [uv_buf]"+r"(uv_buf), // %[uv_buf] 170333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [argb_buf]"+r"(argb_buf), // %[argb_buf] 170433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [width]"+rm"(width) // %[width] 170533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 170633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 170733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 170833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 170933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 171033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 171133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 171233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 171333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid OMITFP NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, 171433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* vu_buf, 171533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* argb_buf, 171633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 171733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 171833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 171933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm4 \n" 172033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 172133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 172233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READNV12 172333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YVUTORGB 172433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm0 \n" 172533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm2 \n" 172633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 172733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm2,%%xmm0 \n" 172833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm2,%%xmm1 \n" 172933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm0,(%[argb_buf]) \n" 173033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm1,0x10(%[argb_buf]) \n" 173133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%[argb_buf]),%[argb_buf] \n" 173233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%[width] \n" 173333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 173433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [y_buf]"+r"(y_buf), // %[y_buf] 173533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [uv_buf]"+r"(vu_buf), // %[uv_buf] 173633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [argb_buf]"+r"(argb_buf), // %[argb_buf] 173733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [width]"+rm"(width) // %[width] 173833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 173933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 174033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 174133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 174233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 174333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 174433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 174533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 174633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, 174733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* u_buf, 174833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* v_buf, 174933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* bgra_buf, 175033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 175133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 175233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %[u_buf],%[v_buf] \n" 175333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 175433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm4 \n" 175533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 175633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 175733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READYUV422 175833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUVTORGB 175933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 176033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm0,%%xmm1 \n" 176133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm2,%%xmm5 \n" 176233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm5,%%xmm0 \n" 176333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm1,%%xmm5 \n" 176433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm1,%%xmm0 \n" 176533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm5,(%[argb_buf]) \n" 176633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,0x10(%[argb_buf]) \n" 176733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%[argb_buf]),%[argb_buf] \n" 176833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%[width] \n" 176933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 177033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [y_buf]"+r"(y_buf), // %[y_buf] 177133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [u_buf]"+r"(u_buf), // %[u_buf] 177233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [v_buf]"+r"(v_buf), // %[v_buf] 177333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [argb_buf]"+r"(bgra_buf), // %[argb_buf] 177433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [width]"+rm"(width) // %[width] 177533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 177633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 177733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 177833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 177933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 178033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 178133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 178233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 178333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, 178433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* u_buf, 178533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* v_buf, 178633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* abgr_buf, 178733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 178833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 178933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %[u_buf],%[v_buf] \n" 179033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 179133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm4 \n" 179233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 179333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 179433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READYUV422 179533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUVTORGB 179633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm2 \n" 179733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm0 \n" 179833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm1 \n" 179933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm0,%%xmm2 \n" 180033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm0,%%xmm1 \n" 180133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,(%[argb_buf]) \n" 180233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,0x10(%[argb_buf]) \n" 180333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%[argb_buf]),%[argb_buf] \n" 180433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%[width] \n" 180533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 180633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [y_buf]"+r"(y_buf), // %[y_buf] 180733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [u_buf]"+r"(u_buf), // %[u_buf] 180833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [v_buf]"+r"(v_buf), // %[v_buf] 180933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [argb_buf]"+r"(abgr_buf), // %[argb_buf] 181033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [width]"+rm"(width) // %[width] 181133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 181233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 181333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 181433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 181533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 181633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 181733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 181833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 181933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid OMITFP I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf, 182033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* u_buf, 182133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* v_buf, 182233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* bgra_buf, 182333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 182433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 182533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %[u_buf],%[v_buf] \n" 182633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 182733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm4 \n" 182833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 182933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 183033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READYUV422 183133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUVTORGB 183233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 183333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm0,%%xmm1 \n" 183433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm2,%%xmm5 \n" 183533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm5,%%xmm0 \n" 183633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm1,%%xmm5 \n" 183733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm1,%%xmm0 \n" 183833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm5,(%[argb_buf]) \n" 183933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm0,0x10(%[argb_buf]) \n" 184033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%[argb_buf]),%[argb_buf] \n" 184133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%[width] \n" 184233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 184333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [y_buf]"+r"(y_buf), // %[y_buf] 184433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [u_buf]"+r"(u_buf), // %[u_buf] 184533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [v_buf]"+r"(v_buf), // %[v_buf] 184633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [argb_buf]"+r"(bgra_buf), // %[argb_buf] 184733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [width]"+rm"(width) // %[width] 184833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 184933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 185033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 185133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 185233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 185333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 185433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 185533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 185633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid OMITFP I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf, 185733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* u_buf, 185833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* v_buf, 185933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* abgr_buf, 186033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 186133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 186233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %[u_buf],%[v_buf] \n" 186333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 186433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm4 \n" 186533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 186633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 186733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READYUV422 186833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUVTORGB 186933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm2 \n" 187033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm0 \n" 187133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm1 \n" 187233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm0,%%xmm2 \n" 187333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm0,%%xmm1 \n" 187433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm2,(%[argb_buf]) \n" 187533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm1,0x10(%[argb_buf]) \n" 187633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%[argb_buf]),%[argb_buf] \n" 187733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%[width] \n" 187833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 187933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [y_buf]"+r"(y_buf), // %[y_buf] 188033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [u_buf]"+r"(u_buf), // %[u_buf] 188133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [v_buf]"+r"(v_buf), // %[v_buf] 188233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [argb_buf]"+r"(abgr_buf), // %[argb_buf] 188333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp [width]"+rm"(width) // %[width] 188433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] 188533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 188633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 188733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 188833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 188933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 189033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 189133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_I422TOARGBROW_SSSE3 189233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 189333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_YTOARGBROW_SSE2 189433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid YToARGBRow_SSE2(const uint8* y_buf, 189533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* rgb_buf, 189633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 189733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 189833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm4,%%xmm4 \n" 189933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslld $0x18,%%xmm4 \n" 190033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov $0x10001000,%%eax \n" 190133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%eax,%%xmm3 \n" 190233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x0,%%xmm3,%%xmm3 \n" 190333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov $0x012a012a,%%eax \n" 190433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%eax,%%xmm2 \n" 190533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x0,%%xmm2,%%xmm2 \n" 190633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 190733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 190833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 190933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%0),%%xmm0 \n" 191033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%0),%0 \n" 191133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm0,%%xmm0 \n" 191233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubusw %%xmm3,%%xmm0 \n" 191333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhuw %%xmm2,%%xmm0 \n" 191433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm0,%%xmm0 \n" 191533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 191633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // Step 2: Weave into ARGB 191733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm0,%%xmm0 \n" 191833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 191933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm0,%%xmm0 \n" 192033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm1,%%xmm1 \n" 192133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm4,%%xmm0 \n" 192233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm4,%%xmm1 \n" 192333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1) \n" 192433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,16(%1) \n" 192533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 32(%1),%1 \n" 192633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 192733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%2 \n" 192833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 19297cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde : "+r"(y_buf), // %0 19307cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde "+r"(rgb_buf), // %1 193133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+rm"(width) // %2 193233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 193333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "eax" 193433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 193533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" 193633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 193733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 193833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 193933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_YTOARGBROW_SSE2 19407cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 194133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_MIRRORROW_SSSE3 194233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Shuffle table for reversing the bytes. 194333cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST uvec8 kShuffleMirror = { 194433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u 194533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 194633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 194733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { 194833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp intptr_t temp_width = static_cast<intptr_t>(width); 194933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 195033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm5 \n" 195133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea -0x10(%0),%0 \n" 195233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 195333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 195433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0,%2),%%xmm0 \n" 195533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm5,%%xmm0 \n" 195633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%2 \n" 195733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1) \n" 195833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 195933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 196033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src), // %0 196133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst), // %1 196233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(temp_width) // %2 196333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kShuffleMirror) // %3 196433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 196533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 196633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm5" 196733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 196833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 19697cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 197033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_MIRRORROW_SSSE3 19717cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 197233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_MIRRORROW_SSE2 197333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { 197433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp intptr_t temp_width = static_cast<intptr_t>(width); 197533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 197633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea -0x10(%0),%0 \n" 197733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 197833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 197933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0,%2),%%xmm0 \n" 198033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 198133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psllw $0x8,%%xmm0 \n" 198233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 198333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm1,%%xmm0 \n" 198433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshuflw $0x1b,%%xmm0,%%xmm0 \n" 198533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufhw $0x1b,%%xmm0,%%xmm0 \n" 198633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x4e,%%xmm0,%%xmm0 \n" 198733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%2 \n" 198833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm0,(%1) \n" 198933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 199033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 199133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src), // %0 199233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst), // %1 199333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(temp_width) // %2 199433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 199533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 199633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 199733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1" 199833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 199933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 200033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 200133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_MIRRORROW_SSE2 200233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 200333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_MIRRORROW_UV_SSSE3 200433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Shuffle table for reversing the bytes of UV channels. 200533cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST uvec8 kShuffleMirrorUV = { 200633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u 200733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 200833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, 200933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 201033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp intptr_t temp_width = static_cast<intptr_t>(width); 201133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 201233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %4,%%xmm1 \n" 201333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea -16(%0,%3,2),%0 \n" 201433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%2 \n" 201533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 201633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 201733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 201833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea -16(%0),%0 \n" 201933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm1,%%xmm0 \n" 202033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $8,%3 \n" 202133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlpd %%xmm0,(%1) \n" 202233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhpd %%xmm0,(%1,%2) \n" 202333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 8(%1),%1 \n" 202433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 202533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src), // %0 202633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 202733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 202833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(temp_width) // %3 202933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kShuffleMirrorUV) // %4 203033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 203133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 203233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1" 203333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 203433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 203533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 203633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_MIRRORROW_UV_SSSE3 203733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 203833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_ARGBMIRRORROW_SSSE3 203933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Shuffle table for reversing the bytes. 204033cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST uvec8 kARGBShuffleMirror = { 204133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u 204233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 204333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 204433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { 204533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp intptr_t temp_width = static_cast<intptr_t>(width); 204633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 204733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm5 \n" 204833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea -0x10(%0),%0 \n" 204933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 205033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 205133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0,%2,4),%%xmm0 \n" 205233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm5,%%xmm0 \n" 205333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 205433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1) \n" 205533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 205633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 205733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src), // %0 205833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst), // %1 205933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(temp_width) // %2 206033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kARGBShuffleMirror) // %3 206133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 206233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 206333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm5" 206433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 206533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 206633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 206733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_ARGBMIRRORROW_SSSE3 206833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 206933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_SPLITUV_SSE2 207033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { 207133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 207233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 207333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm5 \n" 207433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%2 \n" 207533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 207633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 207733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 207833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 207933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 208033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm2 \n" 208133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,%%xmm3 \n" 208233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm0 \n" 208333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm1 \n" 208433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 208533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm2 \n" 208633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm3 \n" 208733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm3,%%xmm2 \n" 208833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1) \n" 208933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,(%1,%2) \n" 209033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 209133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%3 \n" 209233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 209333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_uv), // %0 209433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 209533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 209633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %3 209733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 209833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 209933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 210033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 210133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 210233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 210333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 210433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_SPLITUV_SSE2 210533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 210633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_COPYROW_SSE2 210733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid CopyRow_SSE2(const uint8* src, uint8* dst, int count) { 210833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 210933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %0,%1 \n" 211033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 211133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 211233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 211333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 211433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%0,%1) \n" 211533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,0x10(%0,%1) \n" 211633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 211733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x20,%2 \n" 211833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 211933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src), // %0 212033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst), // %1 212133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(count) // %2 212233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 212333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 212433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 212533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1" 212633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 212733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 212833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 212933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_COPYROW_SSE2 213033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 213133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_COPYROW_X86 213233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid CopyRow_X86(const uint8* src, uint8* dst, int width) { 213333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp size_t width_tmp = static_cast<size_t>(width); 213433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 213533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shr $0x2,%2 \n" 213633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "rep movsl \n" 213733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+S"(src), // %0 213833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+D"(dst), // %1 213933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+c"(width_tmp) // %2 214033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 214133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 214233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 214333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 214433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_COPYROW_X86 214533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 214633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_SETROW_X86 214733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid SetRow8_X86(uint8* dst, uint32 v32, int width) { 214833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp size_t width_tmp = static_cast<size_t>(width); 214933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 215033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shr $0x2,%1 \n" 215133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "rep stosl \n" 215233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+D"(dst), // %0 215333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+c"(width_tmp) // %1 215433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "a"(v32) // %2 215533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc"); 215633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 215733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 215833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid SetRows32_X86(uint8* dst, uint32 v32, int width, 215933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int dst_stride, int height) { 216033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int y = 0; y < height; ++y) { 216133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp size_t width_tmp = static_cast<size_t>(width); 216233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint32* d = reinterpret_cast<uint32*>(dst); 216333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 216433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "rep stosl \n" 216533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+D"(d), // %0 216633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+c"(width_tmp) // %1 216733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "a"(v32) // %2 216833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc"); 216933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst += dst_stride; 21707cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 21717cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 217233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_SETROW_X86 217333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 217433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_YUY2TOYROW_SSE2 217533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) { 217633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 217733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 217833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm5 \n" 217933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 218033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 218133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 218233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 218333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 218433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm0 \n" 218533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm1 \n" 218633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 218733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1) \n" 218833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 218933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%2 \n" 219033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 219133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_yuy2), // %0 219233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_y), // %1 219333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 219433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 219533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 219633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 219733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm5" 219833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 219933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 220033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 220133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 220233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, 220333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_u, uint8* dst_v, int pix) { 220433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 220533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 220633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm5 \n" 220733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%2 \n" 220833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 220933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 221033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 221133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 221233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0,%4,1),%%xmm2 \n" 221333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0,%4,1),%%xmm3 \n" 221433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 221533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm2,%%xmm0 \n" 221633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm3,%%xmm1 \n" 221733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm0 \n" 221833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 221933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 222033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 222133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm0 \n" 222233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm0,%%xmm0 \n" 222333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 222433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm1 \n" 222533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm0,(%1) \n" 222633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm1,(%1,%2) \n" 222733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%1),%1 \n" 222833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%3 \n" 222933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 223033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_yuy2), // %0 223133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 223233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 223333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %3 223433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(static_cast<intptr_t>(stride_yuy2)) // %4 223533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 223633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 223733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 223833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 223933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 224033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 224133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 224233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid YUY2ToUV422Row_SSE2(const uint8* src_yuy2, 224333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_u, uint8* dst_v, int pix) { 224433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 224533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 224633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm5 \n" 224733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%2 \n" 224833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 224933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 225033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 225133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 225233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 225333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm0 \n" 225433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 225533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 225633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 225733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm0 \n" 225833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm0,%%xmm0 \n" 225933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 226033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm1 \n" 226133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm0,(%1) \n" 226233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm1,(%1,%2) \n" 226333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%1),%1 \n" 226433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%3 \n" 226533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 226633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_yuy2), // %0 226733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 226833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 226933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %3 227033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 227133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 227233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 227333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm5" 227433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 227533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 227633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 227733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 227833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2, 227933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_y, int pix) { 228033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 228133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 228233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm5 \n" 228333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 228433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 228533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm0 \n" 228633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0),%%xmm1 \n" 228733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 228833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm0 \n" 228933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm1 \n" 229033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 229133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%2 \n" 229233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm0,(%1) \n" 229333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 229433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 229533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_yuy2), // %0 229633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_y), // %1 229733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 229833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 229933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 230033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 230133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm5" 230233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 230333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 230433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 230533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 230633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, 230733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int stride_yuy2, 230833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_u, uint8* dst_v, int pix) { 230933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 231033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 231133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm5 \n" 231233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%2 \n" 231333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 231433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 231533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm0 \n" 231633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0),%%xmm1 \n" 231733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0,%4,1),%%xmm2 \n" 231833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0,%4,1),%%xmm3 \n" 231933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 232033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm2,%%xmm0 \n" 232133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm3,%%xmm1 \n" 232233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm0 \n" 232333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 232433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 232533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 232633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm0 \n" 232733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm0,%%xmm0 \n" 232833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 232933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm1 \n" 233033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm0,(%1) \n" 233133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm1,(%1,%2) \n" 233233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%1),%1 \n" 233333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%3 \n" 233433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 233533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_yuy2), // %0 233633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 233733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 233833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %3 233933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(static_cast<intptr_t>(stride_yuy2)) // %4 234033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 234133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 234233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 234333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 234433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 234533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 234633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 234733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2, 234833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_u, uint8* dst_v, int pix) { 234933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 235033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 235133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm5 \n" 235233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%2 \n" 235333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 235433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 235533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm0 \n" 235633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0),%%xmm1 \n" 235733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 235833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm0 \n" 235933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 236033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 236133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 236233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm0 \n" 236333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm0,%%xmm0 \n" 236433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 236533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm1 \n" 236633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm0,(%1) \n" 236733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm1,(%1,%2) \n" 236833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%1),%1 \n" 236933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%3 \n" 237033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 237133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_yuy2), // %0 237233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 237333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 237433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %3 237533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 237633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 237733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 237833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm5" 237933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 238033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 238133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 238233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 238333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) { 238433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 238533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 238633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 238733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 238833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 238933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 239033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm0 \n" 239133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 239233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 239333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%2 \n" 239433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1) \n" 239533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 239633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 239733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_uyvy), // %0 239833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_y), // %1 239933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 240033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 240133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 240233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 240333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1" 240433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 240533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 240633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 240733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 240833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, 240933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_u, uint8* dst_v, int pix) { 241033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 241133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 241233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm5 \n" 241333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%2 \n" 241433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 241533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 241633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 241733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 241833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0,%4,1),%%xmm2 \n" 241933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0,%4,1),%%xmm3 \n" 242033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 242133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm2,%%xmm0 \n" 242233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm3,%%xmm1 \n" 242333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm0 \n" 242433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm1 \n" 242533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 242633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 242733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm0 \n" 242833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm0,%%xmm0 \n" 242933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 243033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm1 \n" 243133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm0,(%1) \n" 243233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm1,(%1,%2) \n" 243333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%1),%1 \n" 243433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%3 \n" 243533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 243633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_uyvy), // %0 243733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 243833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 243933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %3 244033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(static_cast<intptr_t>(stride_uyvy)) // %4 244133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 244233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 244333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 244433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 244533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 244633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 244733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 244833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid UYVYToUV422Row_SSE2(const uint8* src_uyvy, 244933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_u, uint8* dst_v, int pix) { 245033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 245133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 245233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm5 \n" 245333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%2 \n" 245433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 245533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 245633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 245733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 245833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 245933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm0 \n" 246033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm1 \n" 246133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 246233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 246333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm0 \n" 246433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm0,%%xmm0 \n" 246533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 246633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm1 \n" 246733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm0,(%1) \n" 246833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm1,(%1,%2) \n" 246933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%1),%1 \n" 247033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%3 \n" 247133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 247233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_uyvy), // %0 247333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 247433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 247533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %3 247633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 247733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 247833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 247933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm5" 248033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 248133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 248233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 248333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 248433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy, 248533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_y, int pix) { 248633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 248733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 248833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 248933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm0 \n" 249033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0),%%xmm1 \n" 249133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 249233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm0 \n" 249333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 249433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 249533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%2 \n" 249633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm0,(%1) \n" 249733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 249833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 249933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_uyvy), // %0 250033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_y), // %1 250133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 250233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 250333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 250433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 250533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1" 250633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 250733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 250833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 250933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 251033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy, 251133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_u, uint8* dst_v, int pix) { 251233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 251333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 251433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm5 \n" 251533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%2 \n" 251633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 251733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 251833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm0 \n" 251933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0),%%xmm1 \n" 252033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0,%4,1),%%xmm2 \n" 252133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0,%4,1),%%xmm3 \n" 252233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 252333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm2,%%xmm0 \n" 252433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm3,%%xmm1 \n" 252533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm0 \n" 252633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm1 \n" 252733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 252833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 252933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm0 \n" 253033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm0,%%xmm0 \n" 253133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 253233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm1 \n" 253333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm0,(%1) \n" 253433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm1,(%1,%2) \n" 253533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%1),%1 \n" 253633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%3 \n" 253733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 253833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_uyvy), // %0 253933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 254033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 254133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %3 254233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(static_cast<intptr_t>(stride_uyvy)) // %4 254333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 254433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 254533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 254633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 254733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 254833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 254933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 255033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy, 255133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_u, uint8* dst_v, int pix) { 255233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 255333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 255433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm5 \n" 255533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%2 \n" 255633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 255733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 255833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm0 \n" 255933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu 0x10(%0),%%xmm1 \n" 256033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 256133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm0 \n" 256233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm1 \n" 256333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 256433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 256533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm0 \n" 256633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm0,%%xmm0 \n" 256733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 256833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm1 \n" 256933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm0,(%1) \n" 257033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm1,(%1,%2) \n" 257133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%1),%1 \n" 257233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%3 \n" 257333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 257433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_uyvy), // %0 257533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 257633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 257733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %3 257833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 257933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 258033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 258133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm5" 258233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 258333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 258433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 258533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_YUY2TOYROW_SSE2 258633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 258733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_ARGBBLENDROW_SSE2 258833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Blend 8 pixels at a time. 258933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, 259033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_argb, int width) { 259133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 259233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm7,%%xmm7 \n" 259333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0xf,%%xmm7 \n" 259433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm6,%%xmm6 \n" 259533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm6 \n" 259633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 259733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psllw $0x8,%%xmm5 \n" 259833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm4,%%xmm4 \n" 259933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslld $0x18,%%xmm4 \n" 260033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x1,%3 \n" 260133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "je 91f \n" 260233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jl 99f \n" 260333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 260433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 1 pixel loop until destination pointer is aligned. 260533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "10: \n" 260633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "test $0xf,%2 \n" 260733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "je 19f \n" 260833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%0),%%xmm3 \n" 260933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x4(%0),%0 \n" 261033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm3,%%xmm0 \n" 261133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm3 \n" 261233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%1),%%xmm2 \n" 261333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm3 \n" 261433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufhw $0xf5,%%xmm3,%%xmm3 \n" 261533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshuflw $0xf5,%%xmm3,%%xmm3 \n" 261633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm6,%%xmm2 \n" 261733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddw %%xmm7,%%xmm3 \n" 261833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw %%xmm3,%%xmm2 \n" 261933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%1),%%xmm1 \n" 262033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x4(%1),%1 \n" 262133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 262233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm4,%%xmm0 \n" 262333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw %%xmm3,%%xmm1 \n" 262433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm2 \n" 262533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddusb %%xmm2,%%xmm0 \n" 262633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm1 \n" 262733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddusb %%xmm1,%%xmm0 \n" 262833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x1,%3 \n" 262933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%xmm0,(%2) \n" 263033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x4(%2),%2 \n" 263133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jge 10b \n" 263233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 263333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "19: \n" 263433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "add $1-4,%3 \n" 263533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jl 49f \n" 26367cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 263733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 4 pixel loop. 263833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 263933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "41: \n" 264033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm3 \n" 264133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 264233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm3,%%xmm0 \n" 264333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm3 \n" 264433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%1),%%xmm2 \n" 264533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm3 \n" 264633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufhw $0xf5,%%xmm3,%%xmm3 \n" 264733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshuflw $0xf5,%%xmm3,%%xmm3 \n" 264833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm6,%%xmm2 \n" 264933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddw %%xmm7,%%xmm3 \n" 265033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw %%xmm3,%%xmm2 \n" 265133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%1),%%xmm1 \n" 265233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 265333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 265433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm4,%%xmm0 \n" 265533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw %%xmm3,%%xmm1 \n" 265633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm2 \n" 265733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddusb %%xmm2,%%xmm0 \n" 265833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm1 \n" 265933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddusb %%xmm1,%%xmm0 \n" 266033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%3 \n" 266133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%2) \n" 266233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%2),%2 \n" 266333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jge 41b \n" 266433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 266533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "49: \n" 266633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "add $0x3,%3 \n" 266733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jl 99f \n" 266833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 266933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 1 pixel loop. 267033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "91: \n" 267133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%0),%%xmm3 \n" 267233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x4(%0),%0 \n" 267333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm3,%%xmm0 \n" 267433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm3 \n" 267533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%1),%%xmm2 \n" 267633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm3 \n" 267733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufhw $0xf5,%%xmm3,%%xmm3 \n" 267833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshuflw $0xf5,%%xmm3,%%xmm3 \n" 267933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm6,%%xmm2 \n" 268033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddw %%xmm7,%%xmm3 \n" 268133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw %%xmm3,%%xmm2 \n" 268233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%1),%%xmm1 \n" 268333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x4(%1),%1 \n" 268433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 268533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm4,%%xmm0 \n" 268633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw %%xmm3,%%xmm1 \n" 268733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm2 \n" 268833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddusb %%xmm2,%%xmm0 \n" 268933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm1 \n" 269033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddusb %%xmm1,%%xmm0 \n" 269133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x1,%3 \n" 269233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%xmm0,(%2) \n" 269333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x4(%2),%2 \n" 269433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jge 91b \n" 269533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "99: \n" 269633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_argb0), // %0 269733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(src_argb1), // %1 269833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_argb), // %2 269933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %3 270033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 270133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 270233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 270333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" 270433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 270533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 270633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 270733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_ARGBBLENDROW_SSE2 270833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 270933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_ARGBBLENDROW_SSSE3 271033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Shuffle table for isolating alpha. 271133cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST uvec8 kShuffleAlpha = { 271233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, 271333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80 271433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 271533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 271633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Blend 8 pixels at a time 271733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Shuffle table for reversing the bytes. 271833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 271933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Same as SSE2, but replaces 272033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// psrlw xmm3, 8 // alpha 272133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// pshufhw xmm3, xmm3,0F5h // 8 alpha words 272233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// pshuflw xmm3, xmm3,0F5h 272333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// with.. 272433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// pshufb xmm3, kShuffleAlpha // alpha 272533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 272633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, 272733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_argb, int width) { 272833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 272933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm7,%%xmm7 \n" 273033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0xf,%%xmm7 \n" 273133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm6,%%xmm6 \n" 273233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm6 \n" 273333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 273433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psllw $0x8,%%xmm5 \n" 273533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm4,%%xmm4 \n" 273633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslld $0x18,%%xmm4 \n" 273733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x1,%3 \n" 273833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "je 91f \n" 273933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jl 99f \n" 274033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 274133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 1 pixel loop until destination pointer is aligned. 274233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "10: \n" 274333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "test $0xf,%2 \n" 274433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "je 19f \n" 274533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%0),%%xmm3 \n" 274633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x4(%0),%0 \n" 274733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm3,%%xmm0 \n" 274833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm3 \n" 274933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%1),%%xmm2 \n" 275033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %4,%%xmm3 \n" 275133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm6,%%xmm2 \n" 275233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddw %%xmm7,%%xmm3 \n" 275333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw %%xmm3,%%xmm2 \n" 275433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%1),%%xmm1 \n" 275533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x4(%1),%1 \n" 275633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 275733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm4,%%xmm0 \n" 275833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw %%xmm3,%%xmm1 \n" 275933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm2 \n" 276033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddusb %%xmm2,%%xmm0 \n" 276133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm1 \n" 276233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddusb %%xmm1,%%xmm0 \n" 276333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x1,%3 \n" 276433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%xmm0,(%2) \n" 276533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x4(%2),%2 \n" 276633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jge 10b \n" 276733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 276833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "19: \n" 276933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "add $1-4,%3 \n" 277033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jl 49f \n" 277133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "test $0xf,%0 \n" 277233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jne 41f \n" 277333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "test $0xf,%1 \n" 277433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jne 41f \n" 277533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 277633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 4 pixel loop. 277733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 277833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "40: \n" 277933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm3 \n" 278033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 278133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm3,%%xmm0 \n" 278233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm3 \n" 278333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%1),%%xmm2 \n" 278433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %4,%%xmm3 \n" 278533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm6,%%xmm2 \n" 278633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddw %%xmm7,%%xmm3 \n" 278733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw %%xmm3,%%xmm2 \n" 278833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%1),%%xmm1 \n" 278933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 279033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 279133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm4,%%xmm0 \n" 279233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw %%xmm3,%%xmm1 \n" 279333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm2 \n" 279433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddusb %%xmm2,%%xmm0 \n" 279533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm1 \n" 279633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddusb %%xmm1,%%xmm0 \n" 279733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%3 \n" 279833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%2) \n" 279933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%2),%2 \n" 280033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jge 40b \n" 280133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jmp 49f \n" 280233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 280333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 4 pixel unaligned loop. 280433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 280533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "41: \n" 280633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm3 \n" 280733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 280833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm3,%%xmm0 \n" 280933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm3 \n" 281033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%1),%%xmm2 \n" 281133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %4,%%xmm3 \n" 281233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm6,%%xmm2 \n" 281333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddw %%xmm7,%%xmm3 \n" 281433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw %%xmm3,%%xmm2 \n" 281533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%1),%%xmm1 \n" 281633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 281733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 281833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm4,%%xmm0 \n" 281933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw %%xmm3,%%xmm1 \n" 282033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm2 \n" 282133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddusb %%xmm2,%%xmm0 \n" 282233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm1 \n" 282333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddusb %%xmm1,%%xmm0 \n" 282433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%3 \n" 282533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%2) \n" 282633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%2),%2 \n" 282733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jge 41b \n" 282833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 282933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "49: \n" 283033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "add $0x3,%3 \n" 283133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jl 99f \n" 283233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 283333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 1 pixel loop. 283433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "91: \n" 283533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%0),%%xmm3 \n" 283633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x4(%0),%0 \n" 283733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm3,%%xmm0 \n" 283833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm3 \n" 283933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%1),%%xmm2 \n" 284033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %4,%%xmm3 \n" 284133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm6,%%xmm2 \n" 284233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddw %%xmm7,%%xmm3 \n" 284333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw %%xmm3,%%xmm2 \n" 284433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%1),%%xmm1 \n" 284533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x4(%1),%1 \n" 284633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 284733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm4,%%xmm0 \n" 284833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw %%xmm3,%%xmm1 \n" 284933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm2 \n" 285033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddusb %%xmm2,%%xmm0 \n" 285133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm1 \n" 285233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddusb %%xmm1,%%xmm0 \n" 285333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x1,%3 \n" 285433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%xmm0,(%2) \n" 285533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x4(%2),%2 \n" 285633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jge 91b \n" 285733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "99: \n" 285833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_argb0), // %0 285933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(src_argb1), // %1 286033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_argb), // %2 286133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %3 286233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kShuffleAlpha) // %4 286333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 286433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 286533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" 286633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 286733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 286833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 286933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_ARGBBLENDROW_SSSE3 287033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 287133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_ARGBATTENUATE_SSE2 287233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Attenuate 4 pixels at a time. 287333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// aligned to 16 bytes 287433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { 287533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 287633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %0,%1 \n" 287733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm4,%%xmm4 \n" 287833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslld $0x18,%%xmm4 \n" 287933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm5,%%xmm5 \n" 288033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrld $0x8,%%xmm5 \n" 288133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 288233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 4 pixel loop. 288333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 288433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 288533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 288633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm0,%%xmm0 \n" 288733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufhw $0xff,%%xmm0,%%xmm2 \n" 288833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshuflw $0xff,%%xmm2,%%xmm2 \n" 288933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhuw %%xmm2,%%xmm0 \n" 289033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm1 \n" 289133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm1,%%xmm1 \n" 289233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufhw $0xff,%%xmm1,%%xmm2 \n" 289333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshuflw $0xff,%%xmm2,%%xmm2 \n" 289433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhuw %%xmm2,%%xmm1 \n" 289533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm2 \n" 289633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm0 \n" 289733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm4,%%xmm2 \n" 289833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 289933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 290033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm5,%%xmm0 \n" 290133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm2,%%xmm0 \n" 290233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 290333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%0,%1,1) \n" 290433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 290533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 290633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_argb), // %0 290733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_argb), // %1 290833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %2 290933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 291033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 291133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 291233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 291333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 291433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 291533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 291633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_ARGBATTENUATE_SSE2 291733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 291833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_ARGBATTENUATEROW_SSSE3 291933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Shuffle table duplicating alpha 292033cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST uvec8 kShuffleAlpha0 = { 292133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u, 292233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 292333cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST uvec8 kShuffleAlpha1 = { 292433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, 292533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u, 292633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 292733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Attenuate 4 pixels at a time. 292833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// aligned to 16 bytes 292933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { 293033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 293133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %0,%1 \n" 293233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm3,%%xmm3 \n" 293333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslld $0x18,%%xmm3 \n" 293433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm4 \n" 293533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %4,%%xmm5 \n" 293633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 293733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 4 pixel loop. 293833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 293933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 294033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 294133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm4,%%xmm0 \n" 294233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm1 \n" 294333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm1 \n" 294433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhuw %%xmm1,%%xmm0 \n" 294533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm1 \n" 294633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufb %%xmm5,%%xmm1 \n" 294733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm2 \n" 294833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm2,%%xmm2 \n" 294933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhuw %%xmm2,%%xmm1 \n" 295033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm2 \n" 295133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm3,%%xmm2 \n" 295233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm0 \n" 295333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 295433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 295533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm2,%%xmm0 \n" 295633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 295733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%0,%1,1) \n" 295833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 295933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 296033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_argb), // %0 296133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_argb), // %1 296233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %2 296333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kShuffleAlpha0), // %3 296433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kShuffleAlpha1) // %4 296533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 296633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 296733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 296833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 296933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 297033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 297133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_ARGBATTENUATEROW_SSSE3 297233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 297333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_ARGBUNATTENUATEROW_SSE2 297433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Unattenuate 4 pixels at a time. 297533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// aligned to 16 bytes 297633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, 29777cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int width) { 297833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uintptr_t alpha = 0; 297933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 298033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %0,%1 \n" 298133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm4,%%xmm4 \n" 298233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslld $0x18,%%xmm4 \n" 298333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 298433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 4 pixel loop. 298533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 298633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 298733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 298833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movzb 0x3(%0),%3 \n" 298933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm0,%%xmm0 \n" 299033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd 0x0(%4,%3,4),%%xmm2 \n" 299133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movzb 0x7(%0),%3 \n" 299233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd 0x0(%4,%3,4),%%xmm3 \n" 299333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshuflw $0xc0,%%xmm2,%%xmm2 \n" 299433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshuflw $0xc0,%%xmm3,%%xmm3 \n" 299533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlhps %%xmm3,%%xmm2 \n" 299633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhuw %%xmm2,%%xmm0 \n" 299733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm1 \n" 299833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movzb 0xb(%0),%3 \n" 299933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm1,%%xmm1 \n" 300033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd 0x0(%4,%3,4),%%xmm2 \n" 300133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movzb 0xf(%0),%3 \n" 300233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd 0x0(%4,%3,4),%%xmm3 \n" 300333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshuflw $0xc0,%%xmm2,%%xmm2 \n" 300433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshuflw $0xc0,%%xmm3,%%xmm3 \n" 300533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlhps %%xmm3,%%xmm2 \n" 300633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhuw %%xmm2,%%xmm1 \n" 300733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm2 \n" 300833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm4,%%xmm2 \n" 300933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 301033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm2,%%xmm0 \n" 301133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 301233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%0,%1,1) \n" 301333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 301433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 301533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_argb), // %0 301633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_argb), // %1 301733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width), // %2 301833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(alpha) // %3 301933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(fixed_invtbl8) // %4 302033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 302133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 302233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 302333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 302433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 30257cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 302633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_ARGBUNATTENUATEROW_SSE2 30277cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 302833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_ARGBGRAYROW_SSSE3 302933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Constant for ARGB color to gray scale. 0.11 * B + 0.59 * G + 0.30 * R 303033cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST vec8 kARGBToGray = { 303133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0 303233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 303333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 303433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels 303533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { 303633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 303733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm4 \n" 303833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %0,%1 \n" 303933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 304033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 8 pixel loop. 304133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 304233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 304333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 304433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 304533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm0 \n" 304633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm1 \n" 304733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm1,%%xmm0 \n" 304833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm0 \n" 304933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm0,%%xmm0 \n" 305033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm2 \n" 305133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm3 \n" 305233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrld $0x18,%%xmm2 \n" 305333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrld $0x18,%%xmm3 \n" 305433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm3,%%xmm2 \n" 305533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm2,%%xmm2 \n" 305633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm3 \n" 305733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm0,%%xmm0 \n" 305833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm2,%%xmm3 \n" 305933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 306033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm3,%%xmm0 \n" 306133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm3,%%xmm1 \n" 306233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%2 \n" 306333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%0,%1,1) \n" 306433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,0x10(%0,%1,1) \n" 306533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 306633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 306733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_argb), // %0 306833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_argb), // %1 306933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %2 307033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kARGBToGray) // %3 307133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 307233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 307333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" 307433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 307533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 30767cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 307733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_ARGBGRAYROW_SSSE3 30787cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 307933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_ARGBSEPIAROW_SSSE3 308033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// b = (r * 35 + g * 68 + b * 17) >> 7 308133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// g = (r * 45 + g * 88 + b * 22) >> 7 308233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// r = (r * 50 + g * 98 + b * 24) >> 7 308333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Constant for ARGB color to sepia tone 308433cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST vec8 kARGBToSepiaB = { 308533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0 308633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 308733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 308833cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST vec8 kARGBToSepiaG = { 308933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0 309033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 309133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 309233cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampCONST vec8 kARGBToSepiaR = { 309333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0 309433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}; 309533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 309633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. 309733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) { 309833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 309933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %2,%%xmm2 \n" 310033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %3,%%xmm3 \n" 310133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %4,%%xmm4 \n" 310233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 310333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 8 pixel loop. 310433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 310533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 310633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 310733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm6 \n" 310833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm2,%%xmm0 \n" 310933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm2,%%xmm6 \n" 311033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm6,%%xmm0 \n" 311133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm0 \n" 311233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm0,%%xmm0 \n" 311333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm5 \n" 311433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 311533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm3,%%xmm5 \n" 311633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm3,%%xmm1 \n" 311733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm1,%%xmm5 \n" 311833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm5 \n" 311933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm5,%%xmm5 \n" 312033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm0 \n" 312133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm5 \n" 312233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 312333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm5 \n" 312433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm1 \n" 312533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddw %%xmm1,%%xmm5 \n" 312633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm5 \n" 312733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm5,%%xmm5 \n" 312833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm6 \n" 312933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 313033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrld $0x18,%%xmm6 \n" 313133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrld $0x18,%%xmm1 \n" 313233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm6 \n" 313333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm6,%%xmm6 \n" 313433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm6,%%xmm5 \n" 313533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 313633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm5,%%xmm0 \n" 313733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm5,%%xmm1 \n" 313833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%1 \n" 313933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%0) \n" 314033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,0x10(%0) \n" 314133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 314233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 314333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(dst_argb), // %0 314433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %1 314533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "m"(kARGBToSepiaB), // %2 314633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kARGBToSepiaG), // %3 314733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "m"(kARGBToSepiaR) // %4 314833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 314933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 315033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" 315133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 315233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 315333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 315433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_ARGBSEPIAROW_SSSE3 315533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 315633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3 315733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Tranform 8 ARGB pixels (32 bytes) with color matrix. 315833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Same as Sepia except matrix is provided. 315933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb, 316033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 316133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 316233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%2),%%xmm2 \n" 316333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd 0x4(%2),%%xmm3 \n" 316433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd 0x8(%2),%%xmm4 \n" 316533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x0,%%xmm2,%%xmm2 \n" 316633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x0,%%xmm3,%%xmm3 \n" 316733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x0,%%xmm4,%%xmm4 \n" 316833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 316933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 8 pixel loop. 317033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 317133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 317233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 317333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm6 \n" 317433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm2,%%xmm0 \n" 317533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm2,%%xmm6 \n" 317633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm5 \n" 317733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 317833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm3,%%xmm5 \n" 317933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm3,%%xmm1 \n" 318033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddsw %%xmm6,%%xmm0 \n" 318133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddsw %%xmm1,%%xmm5 \n" 318233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x7,%%xmm0 \n" 318333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x7,%%xmm5 \n" 318433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm0,%%xmm0 \n" 318533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm5,%%xmm5 \n" 318633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm0 \n" 318733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm5 \n" 318833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 318933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm5 \n" 319033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm4,%%xmm1 \n" 319133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "phaddsw %%xmm1,%%xmm5 \n" 319233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psraw $0x7,%%xmm5 \n" 319333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm5,%%xmm5 \n" 319433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm6 \n" 319533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 319633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrld $0x18,%%xmm6 \n" 319733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrld $0x18,%%xmm1 \n" 319833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm6 \n" 319933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm6,%%xmm6 \n" 320033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 320133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm6,%%xmm5 \n" 320233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm5,%%xmm0 \n" 320333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm5,%%xmm1 \n" 320433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%1 \n" 320533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%0) \n" 320633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,0x10(%0) \n" 320733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 320833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 320933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(dst_argb), // %0 321033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %1 321133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(matrix_argb) // %2 321233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 321333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 321433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" 321533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 321633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 32177cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 321833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_ARGBCOLORMATRIXROW_SSSE3 32197cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 322033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_ARGBQUANTIZEROW_SSE2 322133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Quantize 4 ARGB pixels (16 bytes). 322233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// aligned to 16 bytes 322333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, 322433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int interval_offset, int width) { 322533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 322633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %2,%%xmm2 \n" 322733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %3,%%xmm3 \n" 322833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %4,%%xmm4 \n" 322933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshuflw $0x40,%%xmm2,%%xmm2 \n" 323033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x44,%%xmm2,%%xmm2 \n" 323133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshuflw $0x40,%%xmm3,%%xmm3 \n" 323233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x44,%%xmm3,%%xmm3 \n" 323333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshuflw $0x40,%%xmm4,%%xmm4 \n" 323433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x44,%%xmm4,%%xmm4 \n" 323533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm5,%%xmm5 \n" 323633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pcmpeqb %%xmm6,%%xmm6 \n" 323733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pslld $0x18,%%xmm6 \n" 323833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 323933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 4 pixel loop. 324033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 324133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 324233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 324333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm0 \n" 324433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhuw %%xmm2,%%xmm0 \n" 324533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm1 \n" 324633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm5,%%xmm1 \n" 324733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhuw %%xmm2,%%xmm1 \n" 324833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw %%xmm3,%%xmm0 \n" 324933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm7 \n" 325033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmullw %%xmm3,%%xmm1 \n" 325133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pand %%xmm6,%%xmm7 \n" 325233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddw %%xmm4,%%xmm0 \n" 325333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddw %%xmm4,%%xmm1 \n" 325433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 325533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "por %%xmm7,%%xmm0 \n" 325633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%1 \n" 325733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%0) \n" 325833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 325933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 326033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(dst_argb), // %0 326133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %1 326233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(scale), // %2 326333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "r"(interval_size), // %3 326433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "r"(interval_offset) // %4 326533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 326633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 326733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" 32687cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#endif 326933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 327033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 327133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_ARGBQUANTIZEROW_SSE2 327233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 327333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2 327433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Creates a table of cumulative sums where each value is a sum of all values 327533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// above and to the left of the value, inclusive of the value. 327633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, 327733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const int32* previous_cumsum, int width) { 327833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 327933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%2 \n" 328033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm0,%%xmm0 \n" 328133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm1,%%xmm1 \n" 328233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%3 \n" 328333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jl 49f \n" 328433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "test $0xf,%1 \n" 328533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jne 49f \n" 328633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 328733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 4 pixel loop \n" 328833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 328933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "40: \n" 329033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%0),%%xmm2 \n" 329133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 329233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm4 \n" 329333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm2 \n" 329433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm3 \n" 329533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm1,%%xmm2 \n" 329633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm1,%%xmm3 \n" 329733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm1,%%xmm4 \n" 329833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm4,%%xmm5 \n" 329933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm1,%%xmm4 \n" 330033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm1,%%xmm5 \n" 330133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddd %%xmm2,%%xmm0 \n" 330233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%1,%2,1),%%xmm2 \n" 330333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddd %%xmm0,%%xmm2 \n" 330433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddd %%xmm3,%%xmm0 \n" 330533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%1,%2,1),%%xmm3 \n" 330633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddd %%xmm0,%%xmm3 \n" 330733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddd %%xmm4,%%xmm0 \n" 330833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x20(%1,%2,1),%%xmm4 \n" 330933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddd %%xmm0,%%xmm4 \n" 331033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddd %%xmm5,%%xmm0 \n" 331133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x30(%1,%2,1),%%xmm5 \n" 331233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddd %%xmm0,%%xmm5 \n" 331333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,(%1) \n" 331433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm3,0x10(%1) \n" 331533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm4,0x20(%1) \n" 331633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm5,0x30(%1) \n" 331733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%1),%1 \n" 331833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%3 \n" 331933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jge 40b \n" 332033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 332133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "49: \n" 332233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "add $0x3,%3 \n" 332333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jl 19f \n" 332433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 332533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 1 pixel loop \n" 332633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 332733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "10: \n" 332833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%0),%%xmm2 \n" 332933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x4(%0),%0 \n" 333033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm2 \n" 333133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm1,%%xmm2 \n" 333233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddd %%xmm2,%%xmm0 \n" 333333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu (%1,%2,1),%%xmm2 \n" 333433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddd %%xmm0,%%xmm2 \n" 333533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm2,(%1) \n" 333633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 333733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x1,%3 \n" 333833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jge 10b \n" 33397cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 334033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "19: \n" 334133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(row), // %0 334233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(cumsum), // %1 334333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(previous_cumsum), // %2 334433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %3 334533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 334633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 334733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 334833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 334933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 335033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 335133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 335233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2 335333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 335433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_CUMULATIVESUMTOAVERAGE_SSE2 335533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft, 335633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width, int area, uint8* dst, int count) { 335733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 335833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %5,%%xmm4 \n" 335933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cvtdq2ps %%xmm4,%%xmm4 \n" 336033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "rcpss %%xmm4,%%xmm4 \n" 336133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x0,%%xmm4,%%xmm4 \n" 336233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%3 \n" 336333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jl 49f \n" 336433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 336533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 4 pixel loop \n" 336633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 336733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "40: \n" 336833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 336933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 337033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x20(%0),%%xmm2 \n" 337133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x30(%0),%%xmm3 \n" 337233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubd (%0,%4,4),%%xmm0 \n" 337333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubd 0x10(%0,%4,4),%%xmm1 \n" 337433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubd 0x20(%0,%4,4),%%xmm2 \n" 337533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubd 0x30(%0,%4,4),%%xmm3 \n" 337633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%0),%0 \n" 337733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubd (%1),%%xmm0 \n" 337833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubd 0x10(%1),%%xmm1 \n" 337933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubd 0x20(%1),%%xmm2 \n" 338033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubd 0x30(%1),%%xmm3 \n" 338133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddd (%1,%4,4),%%xmm0 \n" 338233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddd 0x10(%1,%4,4),%%xmm1 \n" 338333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddd 0x20(%1,%4,4),%%xmm2 \n" 338433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddd 0x30(%1,%4,4),%%xmm3 \n" 338533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x40(%1),%1 \n" 338633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cvtdq2ps %%xmm0,%%xmm0 \n" 338733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cvtdq2ps %%xmm1,%%xmm1 \n" 338833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mulps %%xmm4,%%xmm0 \n" 338933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mulps %%xmm4,%%xmm1 \n" 339033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cvtdq2ps %%xmm2,%%xmm2 \n" 339133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cvtdq2ps %%xmm3,%%xmm3 \n" 339233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mulps %%xmm4,%%xmm2 \n" 339333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mulps %%xmm4,%%xmm3 \n" 339433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cvtps2dq %%xmm0,%%xmm0 \n" 339533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cvtps2dq %%xmm1,%%xmm1 \n" 339633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cvtps2dq %%xmm2,%%xmm2 \n" 339733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cvtps2dq %%xmm3,%%xmm3 \n" 339833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packssdw %%xmm1,%%xmm0 \n" 339933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packssdw %%xmm3,%%xmm2 \n" 340033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm2,%%xmm0 \n" 340133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqu %%xmm0,(%2) \n" 340233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%2),%2 \n" 340333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%3 \n" 340433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jge 40b \n" 340533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 340633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "49: \n" 340733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "add $0x3,%3 \n" 340833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jl 19f \n" 340933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 341033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 1 pixel loop \n" 341133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 341233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "10: \n" 341333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 341433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubd (%0,%4,4),%%xmm0 \n" 341533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 341633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubd (%1),%%xmm0 \n" 341733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddd (%1,%4,4),%%xmm0 \n" 341833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 341933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cvtdq2ps %%xmm0,%%xmm0 \n" 342033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mulps %%xmm4,%%xmm0 \n" 342133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cvtps2dq %%xmm0,%%xmm0 \n" 342233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packssdw %%xmm0,%%xmm0 \n" 342333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm0,%%xmm0 \n" 342433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%xmm0,(%2) \n" 342533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x4(%2),%2 \n" 342633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x1,%3 \n" 342733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jge 10b \n" 342833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "19: \n" 342933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(topleft), // %0 343033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(botleft), // %1 343133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst), // %2 343233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+rm"(count) // %3 343333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(static_cast<intptr_t>(width)), // %4 343433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "rm"(area) // %5 343533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 343633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 343733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" 343833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 343933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 344033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 344133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_CUMULATIVESUMTOAVERAGE_SSE2 344233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_ARGBSHADE_SSE2 344333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Shade 4 pixels at a time by specified value. 344433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Aligned to 16 bytes. 344533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, 344633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint32 value) { 344733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 344833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %3,%%xmm2 \n" 344933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %0,%1 \n" 345033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm2,%%xmm2 \n" 345133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklqdq %%xmm2,%%xmm2 \n" 345233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 345333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 4 pixel loop. 345433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 345533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 345633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 345733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 345833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm0,%%xmm0 \n" 345933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm1,%%xmm1 \n" 346033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhuw %%xmm2,%%xmm0 \n" 346133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhuw %%xmm2,%%xmm1 \n" 346233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm0 \n" 346333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x8,%%xmm1 \n" 346433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 346533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 346633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%0,%1,1) \n" 346733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0),%0 \n" 346833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 346933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_argb), // %0 347033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_argb), // %1 347133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %2 347233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(value) // %3 347333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 347433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 347533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2" 347633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 347733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 347833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 347933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_ARGBSHADE_SSE2 348033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 348133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_ARGBAFFINEROW_SSE2 348233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// TODO(fbarchard): Find 64 bit way to avoid masking. 348333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// TODO(fbarchard): Investigate why 4 pixels is slower than 2 on Core2. 348433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Copy ARGB pixels from source image with slope to a row of destination. 348533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Caveat - in 64 bit, movd is used with 64 bit gpr due to Mac gcc producing 348633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// an error if movq is used. movd %%xmm0,%1 348733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 348833cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampLIBYUV_API 348933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, 349033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_argb, const float* uv_dudv, int width) { 349133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp intptr_t src_argb_stride_temp = src_argb_stride; 349233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp intptr_t temp = 0; 349333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 349433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%3),%%xmm2 \n" 349533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq 0x8(%3),%%xmm7 \n" 349633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shl $0x10,%1 \n" 349733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "add $0x4,%1 \n" 349833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %1,%%xmm5 \n" 349933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%4 \n" 350033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jl 49f \n" 350133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 350233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x44,%%xmm7,%%xmm7 \n" 350333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x0,%%xmm5,%%xmm5 \n" 350433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm0 \n" 350533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "addps %%xmm7,%%xmm0 \n" 350633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlhps %%xmm0,%%xmm2 \n" 350733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm7,%%xmm4 \n" 350833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "addps %%xmm4,%%xmm4 \n" 350933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm3 \n" 351033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "addps %%xmm4,%%xmm3 \n" 351133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "addps %%xmm4,%%xmm4 \n" 351233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 351333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 4 pixel loop \n" 351433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 351533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "40: \n" 351633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cvttps2dq %%xmm2,%%xmm0 \n" 351733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cvttps2dq %%xmm3,%%xmm1 \n" 351833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packssdw %%xmm1,%%xmm0 \n" 351933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddwd %%xmm5,%%xmm0 \n" 352033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__x86_64__) 352133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%xmm0,%1 \n" 352233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov %1,%5 \n" 352333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "and $0x0fffffff,%1 \n" 352433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shr $32,%5 \n" 352533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0xEE,%%xmm0,%%xmm0 \n" 352633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#else 352733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%xmm0,%1 \n" 352833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x39,%%xmm0,%%xmm0 \n" 352933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%xmm0,%5 \n" 353033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x39,%%xmm0,%%xmm0 \n" 353133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 353233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%0,%1,1),%%xmm1 \n" 353333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%0,%5,1),%%xmm6 \n" 353433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm6,%%xmm1 \n" 353533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "addps %%xmm4,%%xmm2 \n" 353633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm1,(%2) \n" 353733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__x86_64__) 353833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%xmm0,%1 \n" 353933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov %1,%5 \n" 354033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "and $0x0fffffff,%1 \n" 354133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shr $32,%5 \n" 354233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#else 354333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%xmm0,%1 \n" 354433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x39,%%xmm0,%%xmm0 \n" 354533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%xmm0,%5 \n" 354633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 354733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%0,%1,1),%%xmm0 \n" 354833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%0,%5,1),%%xmm6 \n" 354933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm6,%%xmm0 \n" 355033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "addps %%xmm4,%%xmm3 \n" 355133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%4 \n" 355233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm0,0x08(%2) \n" 355333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%2),%2 \n" 355433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jge 40b \n" 355533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 355633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "49: \n" 355733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "add $0x3,%4 \n" 355833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jl 19f \n" 355933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 356033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // 1 pixel loop \n" 356133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 356233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "10: \n" 356333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cvttps2dq %%xmm2,%%xmm0 \n" 356433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packssdw %%xmm0,%%xmm0 \n" 356533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddwd %%xmm5,%%xmm0 \n" 356633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "addps %%xmm7,%%xmm2 \n" 356733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%xmm0,%1 \n" 356833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__x86_64__) 356933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "and $0x0fffffff,%1 \n" 357033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 357133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%0,%1,1),%%xmm0 \n" 357233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x1,%4 \n" 357333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %%xmm0,(%2) \n" 357433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x4(%2),%2 \n" 357533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jge 10b \n" 357633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "19: \n" 357733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_argb), // %0 357833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(src_argb_stride_temp), // %1 357933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_argb), // %2 358033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(uv_dudv), // %3 358133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+rm"(width), // %4 358233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(temp) // %5 358333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 358433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 358533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 358633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" 358733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 358833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 358933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 359033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_ARGBAFFINEROW_SSE2 359133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 359233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version 359333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBInterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, 359433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ptrdiff_t src_stride, int dst_width, 359533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int source_y_fraction) { 359633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 359733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%0 \n" 359833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shr %3 \n" 359933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cmp $0x0,%3 \n" 360033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "je 2f \n" 360133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cmp $0x40,%3 \n" 360233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "je 3f \n" 360333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %3,%%xmm0 \n" 360433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "neg %3 \n" 360533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "add $0x80,%3 \n" 360633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %3,%%xmm5 \n" 360733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm0,%%xmm5 \n" 360833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm5,%%xmm5 \n" 360933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x0,%%xmm5,%%xmm5 \n" 361033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 361133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 361233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%1),%%xmm0 \n" 361333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%1,%4,1),%%xmm2 \n" 361433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 361533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm2,%%xmm0 \n" 361633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm2,%%xmm1 \n" 361733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm5,%%xmm0 \n" 361833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm5,%%xmm1 \n" 361933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm0 \n" 362033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm1 \n" 362133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 362233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 362333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1,%0,1) \n" 362433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 362533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 362633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jmp 4f \n" 362733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 362833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "2: \n" 362933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%1),%%xmm0 \n" 363033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 363133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1,%0,1) \n" 363233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 363333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 2b \n" 363433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jmp 4f \n" 363533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 363633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "3: \n" 363733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%1),%%xmm0 \n" 363833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb (%1,%4,1),%%xmm0 \n" 363933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 364033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1,%0,1) \n" 364133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 364233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 3b \n" 364333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "4: \n" 364433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 364533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(dst_ptr), // %0 364633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(src_ptr), // %1 364733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_width), // %2 364833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(source_y_fraction) // %3 364933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(static_cast<intptr_t>(src_stride)) // %4 365033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 365133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 365233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm5" 365333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 365433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 365533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 365633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 365733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // defined(__x86_64__) || defined(__i386__) 365833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 365933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef __cplusplus 36607cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} // extern "C" 366133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} // namespace libyuv 366233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 3663