15327adda475f79405a008a967d30bf7c92e994admikhal@webrtc.org/* 2b0c97975894a5eebebf9d93147cdd941a3accb63fbarchard@google.com * Copyright 2011 The LibYuv Project Authors. All rights reserved. 35327adda475f79405a008a967d30bf7c92e994admikhal@webrtc.org * 45327adda475f79405a008a967d30bf7c92e994admikhal@webrtc.org * Use of this source code is governed by a BSD-style license 55327adda475f79405a008a967d30bf7c92e994admikhal@webrtc.org * that can be found in the LICENSE file in the root of the source 65327adda475f79405a008a967d30bf7c92e994admikhal@webrtc.org * tree. An additional intellectual property rights grant can be found 7cde587092fef0dbed2c35602f30b79e7b892e766fbarchard@google.com * in the file PATENTS. All contributing project authors may 85327adda475f79405a008a967d30bf7c92e994admikhal@webrtc.org * be found in the AUTHORS file in the root of the source tree. 95327adda475f79405a008a967d30bf7c92e994admikhal@webrtc.org */ 105327adda475f79405a008a967d30bf7c92e994admikhal@webrtc.org 11142f6c4ed5eaeec0176f255e64bac8d8c70b42e1fbarchard@google.com#include "libyuv/row.h" 125327adda475f79405a008a967d30bf7c92e994admikhal@webrtc.org 13e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#if defined (_M_X64) 14e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#include <emmintrin.h> 15e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#include <tmmintrin.h> // For _mm_maddubs_epi16 16e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#endif 17e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 18fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#ifdef __cplusplus 19fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.comnamespace libyuv { 205327adda475f79405a008a967d30bf7c92e994admikhal@webrtc.orgextern "C" { 21fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#endif 225327adda475f79405a008a967d30bf7c92e994admikhal@webrtc.org 23e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com// This module is for Visual C. 24e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) 25e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 26e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#define YG 74 /* (int8)(1.164 * 64 + 0.5) */ 27e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 28e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#define UB 127 /* min(127,(int8)(2.018 * 64)) */ 29e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */ 30e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#define UR 0 31e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 32e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#define VB 0 33e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */ 34e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#define VR 102 /* (int8)(1.596 * 64 + 0.5) */ 35e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 36e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com// Bias 37e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#define BB UB * 128 + VB * 128 38e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#define BG UG * 128 + VG * 128 39e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#define BR UR * 128 + VR * 128 40e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 41e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.comstatic const vec8 kUVToB = { 42e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB 43e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com}; 44e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 45e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.comstatic const vec8 kUVToR = { 46e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR 47e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com}; 48e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 49e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.comstatic const vec8 kUVToG = { 50e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG 51e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com}; 52e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 53e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.comstatic const vec8 kVUToB = { 54e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, 55e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com}; 56e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 57e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.comstatic const vec8 kVUToR = { 58e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, 59e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com}; 60e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 61e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.comstatic const vec8 kVUToG = { 62e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, 63e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com}; 64e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 65e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.comstatic const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG }; 66e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.comstatic const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 }; 67e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.comstatic const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB }; 68e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.comstatic const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG }; 69e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.comstatic const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; 70e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 71e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com// 64 bit 72e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#if defined(_M_X64) 73e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 74e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com// Aligned destination version. 75e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com__declspec(align(16)) 76e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.comvoid I422ToARGBRow_SSSE3(const uint8* y_buf, 77e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com const uint8* u_buf, 78e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com const uint8* v_buf, 79e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com uint8* dst_argb, 80e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com int width) { 81e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 82e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com __m128i xmm0, xmm1, xmm2, xmm3; 83e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com const __m128i xmm5 = _mm_set1_epi8(-1); 84e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com const __m128i xmm4 = _mm_setzero_si128(); 85e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; 86e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 87e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com while (width > 0) { 88e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); 89e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); 90e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); 91e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); 92e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_load_si128(&xmm0); 93e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm2 = _mm_load_si128(&xmm0); 94e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kUVToB); 95e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kUVToG); 96e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kUVToR); 97e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_sub_epi16(xmm0, *(__m128i*)kUVBiasB); 98e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_sub_epi16(xmm1, *(__m128i*)kUVBiasG); 99e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm2 = _mm_sub_epi16(xmm2, *(__m128i*)kUVBiasR); 100e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm3 = _mm_loadl_epi64((__m128i*)y_buf); 101e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm3 = _mm_unpacklo_epi8(xmm3, xmm4); 102e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm3 = _mm_subs_epi16(xmm3, *(__m128i*)kYSub16); 103e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm3 = _mm_mullo_epi16(xmm3, *(__m128i*)kYToRgb); 104e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_adds_epi16(xmm0, xmm3); 105e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_adds_epi16(xmm1, xmm3); 106e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm2 = _mm_adds_epi16(xmm2, xmm3); 107e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_srai_epi16(xmm0, 6); 108e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_srai_epi16(xmm1, 6); 109e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm2 = _mm_srai_epi16(xmm2, 6); 110e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_packus_epi16(xmm0, xmm0); 111e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_packus_epi16(xmm1, xmm1); 112e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm2 = _mm_packus_epi16(xmm2, xmm2); 113e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); 114e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm2 = _mm_unpacklo_epi8(xmm2, xmm5); 115e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_load_si128(&xmm0); 116e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_unpacklo_epi16(xmm0, xmm2); 117e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_unpackhi_epi16(xmm1, xmm2); 118e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 119e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com _mm_store_si128((__m128i *)dst_argb, xmm0); 120e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com _mm_store_si128((__m128i *)(dst_argb + 16), xmm1); 121e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 122e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com y_buf += 8; 123e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com u_buf += 4; 124e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com dst_argb += 32; 125e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com width -= 8; 126e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com } 127e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com} 128e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 129e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com// Unaligned destination version. 130e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.comvoid I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, 131e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com const uint8* u_buf, 132e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com const uint8* v_buf, 133e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com uint8* dst_argb, 134e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com int width) { 135e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 136e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com __m128i xmm0, xmm1, xmm2, xmm3; 137e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com const __m128i xmm5 = _mm_set1_epi8(-1); 138e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com const __m128i xmm4 = _mm_setzero_si128(); 139e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; 140e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 141e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com while (width > 0) { 142e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); 143e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); 144e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); 145e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); 146e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_load_si128(&xmm0); 147e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm2 = _mm_load_si128(&xmm0); 148e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kUVToB); 149e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kUVToG); 150e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kUVToR); 151e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_sub_epi16(xmm0, *(__m128i*)kUVBiasB); 152e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_sub_epi16(xmm1, *(__m128i*)kUVBiasG); 153e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm2 = _mm_sub_epi16(xmm2, *(__m128i*)kUVBiasR); 154e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm3 = _mm_loadl_epi64((__m128i*)y_buf); 155e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm3 = _mm_unpacklo_epi8(xmm3, xmm4); 156e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm3 = _mm_subs_epi16(xmm3, *(__m128i*)kYSub16); 157e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm3 = _mm_mullo_epi16(xmm3, *(__m128i*)kYToRgb); 158e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_adds_epi16(xmm0, xmm3); 159e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_adds_epi16(xmm1, xmm3); 160e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm2 = _mm_adds_epi16(xmm2, xmm3); 161e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_srai_epi16(xmm0, 6); 162e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_srai_epi16(xmm1, 6); 163e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm2 = _mm_srai_epi16(xmm2, 6); 164e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_packus_epi16(xmm0, xmm0); 165e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_packus_epi16(xmm1, xmm1); 166e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm2 = _mm_packus_epi16(xmm2, xmm2); 167e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); 168e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm2 = _mm_unpacklo_epi8(xmm2, xmm5); 169e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_load_si128(&xmm0); 170e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm0 = _mm_unpacklo_epi16(xmm0, xmm2); 171e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com xmm1 = _mm_unpackhi_epi16(xmm1, xmm2); 172e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 173e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com _mm_storeu_si128((__m128i *)dst_argb, xmm0); 174e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); 175e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com 176e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com y_buf += 8; 177e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com u_buf += 4; 178e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com dst_argb += 32; 179e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com width -= 8; 180e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com } 181e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com} 182e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com// 32 bit 183e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#else // defined(_M_X64) 1842d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com 185585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com#ifdef HAS_ARGBTOYROW_SSSE3 186585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com 187c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Constants for ARGB. 188851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kARGBToY = { 189585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0 190585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com}; 191585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com 1924e0d7cc2c60e8aa85954c48927c6be08ee2b9db4fbarchard@google.com// JPeg full range. 193851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kARGBToYJ = { 194050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0 1954e0d7cc2c60e8aa85954c48927c6be08ee2b9db4fbarchard@google.com}; 1964e0d7cc2c60e8aa85954c48927c6be08ee2b9db4fbarchard@google.com 197851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kARGBToU = { 198585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0 199585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com}; 200585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com 201851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kARGBToUJ = { 202050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0 203050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com}; 204050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 205851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kARGBToV = { 206585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, 207585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com}; 208585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com 209851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kARGBToVJ = { 210050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0 211050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com}; 212050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 213caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com// vpermd for vphaddw + vpackuswb vpermd. 214446f91d040aea92c0522745d176fe8017bd22382fbarchard@google.comstatic const lvec32 kPermdARGBToY_AVX = { 215551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com 0, 4, 1, 5, 2, 6, 3, 7 216551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com}; 217551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com 218caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com// vpshufb for vphaddw + vpackuswb packed to shorts. 219851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const lvec8 kShufARGBToUV_AVX = { 220caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15, 221caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15, 222caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com}; 223caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com 224c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Constants for BGRA. 225851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kBGRAToY = { 2269394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13 2279394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com}; 2289394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 229851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kBGRAToU = { 2309394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112 2319394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com}; 2329394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 233851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kBGRAToV = { 2349394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18 2359394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com}; 2369394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 237c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Constants for ABGR. 238851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kABGRToY = { 2399394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0 2409394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com}; 2419394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 242851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kABGRToU = { 2439394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0 2449394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com}; 2459394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 246851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kABGRToV = { 2479394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0 2489394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com}; 2499394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 25025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com// Constants for RGBA. 251851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kRGBAToY = { 25225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33 25325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com}; 25425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 255851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kRGBAToU = { 25625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38 25725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com}; 25825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 259851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kRGBAToV = { 26025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112 26125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com}; 26225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 263851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const uvec8 kAddY16 = { 264228bdc24e44264baf3402124aaa6d4d81c8896f5fbarchard@google.com 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u 265585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com}; 266585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com 267851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec16 kAddYJ64 = { 2684e0d7cc2c60e8aa85954c48927c6be08ee2b9db4fbarchard@google.com 64, 64, 64, 64, 64, 64, 64, 64 2694e0d7cc2c60e8aa85954c48927c6be08ee2b9db4fbarchard@google.com}; 270551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com 271851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const uvec8 kAddUV128 = { 2729394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 2739394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u 274585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com}; 275585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com 276851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const uvec16 kAddUVJ128 = { 277050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u 278050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com}; 279050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 280ba1f52692605bbf8fedb8a915275c71fa186d291fbarchard@google.com// Shuffle table for converting RGB24 to ARGB. 281851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const uvec8 kShuffleMaskRGB24ToARGB = { 2829394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u 2839394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com}; 2849394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 2859394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com// Shuffle table for converting RAW to ARGB. 286851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const uvec8 kShuffleMaskRAWToARGB = { 2879394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u 2889394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com}; 2899394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 2909eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com// Shuffle table for converting ARGB to RGB24. 291851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const uvec8 kShuffleMaskARGBToRGB24 = { 292f1b6063f50ced6f1b5f9b735011b382a5c1c963ffbarchard@google.com 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u 293f1b6063f50ced6f1b5f9b735011b382a5c1c963ffbarchard@google.com}; 2949eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com 2959eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com// Shuffle table for converting ARGB to RAW. 296851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const uvec8 kShuffleMaskARGBToRAW = { 297f3fb7b692068862b1091c02b41ac48bfa9258d51fbarchard@google.com 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u 298f1b6063f50ced6f1b5f9b735011b382a5c1c963ffbarchard@google.com}; 2999eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com 3004de0c439aae9f2d40246dfebce82c18a159ebdc8fbarchard@google.com// Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4 301851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const uvec8 kShuffleMaskARGBToRGB24_0 = { 302827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u 303827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com}; 304827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com 305827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com// Shuffle table for converting ARGB to RAW. 306851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const uvec8 kShuffleMaskARGBToRAW_0 = { 307827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u 308827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com}; 309827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com 31000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com// Duplicates gray value 3 times and fills in alpha opaque. 311d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 312b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.comvoid I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { 313b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com __asm { 314b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com mov eax, [esp + 4] // src_y 315b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com mov edx, [esp + 8] // dst_argb 316b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com mov ecx, [esp + 12] // pix 317b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0xff000000 318b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pslld xmm5, 24 319b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com 320c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 321f7a5048f548a92825fc32fb107c092b10627a03dfbarchard@google.com convertloop: 322b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com movq xmm0, qword ptr [eax] 323b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com lea eax, [eax + 8] 324b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com punpcklbw xmm0, xmm0 325b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com movdqa xmm1, xmm0 326b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com punpcklwd xmm0, xmm0 327b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com punpckhwd xmm1, xmm1 328b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com por xmm0, xmm5 329b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com por xmm1, xmm5 330b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com movdqa [edx], xmm0 331b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com movdqa [edx + 16], xmm1 332b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com lea edx, [edx + 32] 333b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com sub ecx, 8 33418184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 335b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com ret 336b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com } 337b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com} 338b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com 339d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 34000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.comvoid I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb, 34100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com int pix) { 34200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com __asm { 34300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com mov eax, [esp + 4] // src_y 34400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com mov edx, [esp + 8] // dst_argb 34500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com mov ecx, [esp + 12] // pix 34600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0xff000000 34700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com pslld xmm5, 24 34800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com 349c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 35000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com convertloop: 35100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com movq xmm0, qword ptr [eax] 35200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com lea eax, [eax + 8] 35300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com punpcklbw xmm0, xmm0 35400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com movdqa xmm1, xmm0 35500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com punpcklwd xmm0, xmm0 35600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com punpckhwd xmm1, xmm1 35700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com por xmm0, xmm5 35800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com por xmm1, xmm5 35900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com movdqu [edx], xmm0 36000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com movdqu [edx + 16], xmm1 36100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com lea edx, [edx + 32] 36200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com sub ecx, 8 36300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com jg convertloop 36400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com ret 36500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com } 36600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com} 36700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com 36800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com__declspec(naked) __declspec(align(16)) 369ba1f52692605bbf8fedb8a915275c71fa186d291fbarchard@google.comvoid RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) { 370f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 371ba1f52692605bbf8fedb8a915275c71fa186d291fbarchard@google.com mov eax, [esp + 4] // src_rgb24 372b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com mov edx, [esp + 8] // dst_argb 373b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com mov ecx, [esp + 12] // pix 374b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0xff000000 375b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pslld xmm5, 24 376ba1f52692605bbf8fedb8a915275c71fa186d291fbarchard@google.com movdqa xmm4, kShuffleMaskRGB24ToARGB 377b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com 378c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 379eaedc1d72735e68d45a0b42221a04902e648a21dfbarchard@google.com convertloop: 380b1dd02d66cbda3e0c571bf81c247f850cdb3e2fdfbarchard@google.com movdqu xmm0, [eax] 381b1dd02d66cbda3e0c571bf81c247f850cdb3e2fdfbarchard@google.com movdqu xmm1, [eax + 16] 382b1dd02d66cbda3e0c571bf81c247f850cdb3e2fdfbarchard@google.com movdqu xmm3, [eax + 32] 383b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com lea eax, [eax + 48] 384b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com movdqa xmm2, xmm3 385b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]} 386b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pshufb xmm2, xmm4 387b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com por xmm2, xmm5 388b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]} 389b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pshufb xmm0, xmm4 390b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com movdqa [edx + 32], xmm2 391b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com por xmm0, xmm5 392b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pshufb xmm1, xmm4 393b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com movdqa [edx], xmm0 394b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com por xmm1, xmm5 395b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]} 396b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pshufb xmm3, xmm4 397b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com movdqa [edx + 16], xmm1 398b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com por xmm3, xmm5 39918184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com sub ecx, 16 400b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com movdqa [edx + 48], xmm3 401b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com lea edx, [edx + 64] 40218184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 403b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com ret 404b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com } 405b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com} 406b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com 407d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 408b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.comvoid RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, 409b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com int pix) { 410f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 411b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com mov eax, [esp + 4] // src_raw 412b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com mov edx, [esp + 8] // dst_argb 413b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com mov ecx, [esp + 12] // pix 414b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0xff000000 415b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pslld xmm5, 24 4166334808d9d40071249ba9b51b65aa4e3b6e7f43ffbarchard@google.com movdqa xmm4, kShuffleMaskRAWToARGB 417b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com 418c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 419eaedc1d72735e68d45a0b42221a04902e648a21dfbarchard@google.com convertloop: 420b1dd02d66cbda3e0c571bf81c247f850cdb3e2fdfbarchard@google.com movdqu xmm0, [eax] 421b1dd02d66cbda3e0c571bf81c247f850cdb3e2fdfbarchard@google.com movdqu xmm1, [eax + 16] 422b1dd02d66cbda3e0c571bf81c247f850cdb3e2fdfbarchard@google.com movdqu xmm3, [eax + 32] 423b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com lea eax, [eax + 48] 424b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com movdqa xmm2, xmm3 425b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]} 426b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pshufb xmm2, xmm4 427b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com por xmm2, xmm5 428b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]} 429b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pshufb xmm0, xmm4 430b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com movdqa [edx + 32], xmm2 431b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com por xmm0, xmm5 432b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pshufb xmm1, xmm4 433b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com movdqa [edx], xmm0 434b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com por xmm1, xmm5 435b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]} 436b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pshufb xmm3, xmm4 437b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com movdqa [edx + 16], xmm1 438b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com por xmm3, xmm5 43918184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com sub ecx, 16 440b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com movdqa [edx + 48], xmm3 441b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com lea edx, [edx + 64] 44218184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 443b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com ret 444b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com } 445b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com} 446b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com 447c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// pmul method to replicate bits. 448c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Math to replicate bits: 449ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com// (v << 8) | (v << 3) 450ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com// v * 256 + v * 8 451ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com// v * (256 + 8) 452ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3 453c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// 20 instructions. 454d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 455ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.comvoid RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, 456ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com int pix) { 457f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 458ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com mov eax, 0x01080108 // generate multiplier to repeat 5 bits 459ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com movd xmm5, eax 460ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pshufd xmm5, xmm5, 0 4616d6b7709f754391252dc716b92801cc8ec425642fbarchard@google.com mov eax, 0x20802080 // multiplier shift by 5 and then repeat 6 bits 462ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com movd xmm6, eax 463ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pshufd xmm6, xmm6, 0 464ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red 465ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com psllw xmm3, 11 466ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pcmpeqb xmm4, xmm4 // generate mask 0x07e007e0 for Green 467ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com psllw xmm4, 10 468ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com psrlw xmm4, 5 469ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pcmpeqb xmm7, xmm7 // generate mask 0xff00ff00 for Alpha 470ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com psllw xmm7, 8 471ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com 472ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com mov eax, [esp + 4] // src_rgb565 473ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com mov edx, [esp + 8] // dst_argb 474ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com mov ecx, [esp + 12] // pix 475ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com sub edx, eax 476ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com sub edx, eax 477ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com 478c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 479ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com convertloop: 480b1dd02d66cbda3e0c571bf81c247f850cdb3e2fdfbarchard@google.com movdqu xmm0, [eax] // fetch 8 pixels of bgr565 481ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com movdqa xmm1, xmm0 482ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com movdqa xmm2, xmm0 483ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pand xmm1, xmm3 // R in upper 5 bits 484ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com psllw xmm2, 11 // B in upper 5 bits 485ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pmulhuw xmm1, xmm5 // * (256 + 8) 486ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pmulhuw xmm2, xmm5 // * (256 + 8) 487ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com psllw xmm1, 8 488ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com por xmm1, xmm2 // RB 489ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pand xmm0, xmm4 // G in middle 6 bits 490ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pmulhuw xmm0, xmm6 // << 5 * (256 + 4) 491ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com por xmm0, xmm7 // AG 492ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com movdqa xmm2, xmm1 493ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com punpcklbw xmm1, xmm0 494ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com punpckhbw xmm2, xmm0 495ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com movdqa [eax * 2 + edx], xmm1 // store 4 pixels of ARGB 496ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com movdqa [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB 497ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com lea eax, [eax + 16] 498ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com sub ecx, 8 49918184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 500ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com ret 501ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com } 502ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com} 503ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com 504ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com// 24 instructions 505d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 506ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.comvoid ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, 507ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com int pix) { 508f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 509ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com mov eax, 0x01080108 // generate multiplier to repeat 5 bits 510ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com movd xmm5, eax 511ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pshufd xmm5, xmm5, 0 512ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com mov eax, 0x42004200 // multiplier shift by 6 and then repeat 5 bits 513ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com movd xmm6, eax 514ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pshufd xmm6, xmm6, 0 515ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red 516ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com psllw xmm3, 11 5170e6ce93c84f710e6a589c6c6edfe480ad0567f0cfbarchard@google.com movdqa xmm4, xmm3 // generate mask 0x03e003e0 for Green 518ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com psrlw xmm4, 6 519ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pcmpeqb xmm7, xmm7 // generate mask 0xff00ff00 for Alpha 520ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com psllw xmm7, 8 521ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com 522ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com mov eax, [esp + 4] // src_argb1555 523ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com mov edx, [esp + 8] // dst_argb 524ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com mov ecx, [esp + 12] // pix 525ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com sub edx, eax 526ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com sub edx, eax 527ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com 528c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 529ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com convertloop: 530b1dd02d66cbda3e0c571bf81c247f850cdb3e2fdfbarchard@google.com movdqu xmm0, [eax] // fetch 8 pixels of 1555 531ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com movdqa xmm1, xmm0 532ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com movdqa xmm2, xmm0 533ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com psllw xmm1, 1 // R in upper 5 bits 534ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com psllw xmm2, 11 // B in upper 5 bits 535ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pand xmm1, xmm3 536ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pmulhuw xmm2, xmm5 // * (256 + 8) 537ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pmulhuw xmm1, xmm5 // * (256 + 8) 538ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com psllw xmm1, 8 539ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com por xmm1, xmm2 // RB 540ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com movdqa xmm2, xmm0 541ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pand xmm0, xmm4 // G in middle 5 bits 542ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com psraw xmm2, 8 // A 543ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pmulhuw xmm0, xmm6 // << 6 * (256 + 8) 544ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com pand xmm2, xmm7 545ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com por xmm0, xmm2 // AG 546ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com movdqa xmm2, xmm1 547ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com punpcklbw xmm1, xmm0 548ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com punpckhbw xmm2, xmm0 549ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com movdqa [eax * 2 + edx], xmm1 // store 4 pixels of ARGB 550ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com movdqa [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB 551ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com lea eax, [eax + 16] 552ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com sub ecx, 8 55318184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 554ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com ret 555ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com } 556ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com} 5576aa761da6d5de07a602425bbe070f5dc067c3d68fbarchard@google.com 558c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// 18 instructions. 559d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 56017272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.comvoid ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, 56117272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com int pix) { 562f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 56317272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f 56417272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com movd xmm4, eax 56517272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com pshufd xmm4, xmm4, 0 56617272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com movdqa xmm5, xmm4 // 0xf0f0f0f0 for high nibbles 56717272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com pslld xmm5, 4 56817272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com mov eax, [esp + 4] // src_argb4444 56917272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com mov edx, [esp + 8] // dst_argb 57017272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com mov ecx, [esp + 12] // pix 571ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com sub edx, eax 572ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com sub edx, eax 57317272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com 574c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 57517272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com convertloop: 576b1dd02d66cbda3e0c571bf81c247f850cdb3e2fdfbarchard@google.com movdqu xmm0, [eax] // fetch 8 pixels of bgra4444 57717272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com movdqa xmm2, xmm0 57817272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com pand xmm0, xmm4 // mask low nibbles 57917272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com pand xmm2, xmm5 // mask high nibbles 58017272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com movdqa xmm1, xmm0 58117272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com movdqa xmm3, xmm2 58217272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com psllw xmm1, 4 58317272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com psrlw xmm3, 4 58417272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com por xmm0, xmm1 58517272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com por xmm2, xmm3 586ba1f52692605bbf8fedb8a915275c71fa186d291fbarchard@google.com movdqa xmm1, xmm0 58717272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com punpcklbw xmm0, xmm2 588ba1f52692605bbf8fedb8a915275c71fa186d291fbarchard@google.com punpckhbw xmm1, xmm2 589ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com movdqa [eax * 2 + edx], xmm0 // store 4 pixels of ARGB 590ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com movdqa [eax * 2 + edx + 16], xmm1 // store next 4 pixels of ARGB 591ccd6d9b2de6af7985775a2e5537190cf5794dd44fbarchard@google.com lea eax, [eax + 16] 592ba1f52692605bbf8fedb8a915275c71fa186d291fbarchard@google.com sub ecx, 8 59318184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 59417272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com ret 59517272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com } 59617272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com} 59717272be539c30cebca4cd11e2945ae94cd876a20fbarchard@google.com 598d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 5999eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.comvoid ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) { 600f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 6019eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com mov eax, [esp + 4] // src_argb 6029eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com mov edx, [esp + 8] // dst_rgb 6039eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com mov ecx, [esp + 12] // pix 604510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com movdqa xmm6, kShuffleMaskARGBToRGB24 6059eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com 606c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6079eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com convertloop: 6087e7c7753ba712c0d1365276c27ee13866b948415fbarchard@google.com movdqu xmm0, [eax] // fetch 16 pixels of argb 6097e7c7753ba712c0d1365276c27ee13866b948415fbarchard@google.com movdqu xmm1, [eax + 16] 6107e7c7753ba712c0d1365276c27ee13866b948415fbarchard@google.com movdqu xmm2, [eax + 32] 6117e7c7753ba712c0d1365276c27ee13866b948415fbarchard@google.com movdqu xmm3, [eax + 48] 6129eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com lea eax, [eax + 64] 613510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB 614510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pshufb xmm1, xmm6 615510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pshufb xmm2, xmm6 616510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pshufb xmm3, xmm6 617510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com movdqa xmm4, xmm1 // 4 bytes from 1 for 0 618510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com psrldq xmm1, 4 // 8 bytes from 1 619510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pslldq xmm4, 12 // 4 bytes from 1 for 0 620510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com movdqa xmm5, xmm2 // 8 bytes from 2 for 1 621510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com por xmm0, xmm4 // 4 bytes from 1 for 0 622510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pslldq xmm5, 8 // 8 bytes from 2 for 1 6237e7c7753ba712c0d1365276c27ee13866b948415fbarchard@google.com movdqu [edx], xmm0 // store 0 624510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com por xmm1, xmm5 // 8 bytes from 2 for 1 625510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com psrldq xmm2, 8 // 4 bytes from 2 626510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pslldq xmm3, 4 // 12 bytes from 3 for 2 627510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com por xmm2, xmm3 // 12 bytes from 3 for 2 6287e7c7753ba712c0d1365276c27ee13866b948415fbarchard@google.com movdqu [edx + 16], xmm1 // store 1 6297e7c7753ba712c0d1365276c27ee13866b948415fbarchard@google.com movdqu [edx + 32], xmm2 // store 2 6309eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com lea edx, [edx + 48] 6319eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com sub ecx, 16 63218184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 6339eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com ret 6349eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com } 6359eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com} 6369eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com 637d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 6389eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.comvoid ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) { 639f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 6409eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com mov eax, [esp + 4] // src_argb 6419eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com mov edx, [esp + 8] // dst_rgb 6429eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com mov ecx, [esp + 12] // pix 643510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com movdqa xmm6, kShuffleMaskARGBToRAW 6449eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com 645c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6469eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com convertloop: 6477e7c7753ba712c0d1365276c27ee13866b948415fbarchard@google.com movdqu xmm0, [eax] // fetch 16 pixels of argb 6487e7c7753ba712c0d1365276c27ee13866b948415fbarchard@google.com movdqu xmm1, [eax + 16] 6497e7c7753ba712c0d1365276c27ee13866b948415fbarchard@google.com movdqu xmm2, [eax + 32] 6507e7c7753ba712c0d1365276c27ee13866b948415fbarchard@google.com movdqu xmm3, [eax + 48] 6519eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com lea eax, [eax + 64] 652510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB 653510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pshufb xmm1, xmm6 654510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pshufb xmm2, xmm6 655510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pshufb xmm3, xmm6 656510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com movdqa xmm4, xmm1 // 4 bytes from 1 for 0 657510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com psrldq xmm1, 4 // 8 bytes from 1 658510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pslldq xmm4, 12 // 4 bytes from 1 for 0 659510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com movdqa xmm5, xmm2 // 8 bytes from 2 for 1 660510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com por xmm0, xmm4 // 4 bytes from 1 for 0 661510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pslldq xmm5, 8 // 8 bytes from 2 for 1 6627e7c7753ba712c0d1365276c27ee13866b948415fbarchard@google.com movdqu [edx], xmm0 // store 0 663510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com por xmm1, xmm5 // 8 bytes from 2 for 1 664510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com psrldq xmm2, 8 // 4 bytes from 2 665510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pslldq xmm3, 4 // 12 bytes from 3 for 2 666510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com por xmm2, xmm3 // 12 bytes from 3 for 2 6677e7c7753ba712c0d1365276c27ee13866b948415fbarchard@google.com movdqu [edx + 16], xmm1 // store 1 6687e7c7753ba712c0d1365276c27ee13866b948415fbarchard@google.com movdqu [edx + 32], xmm2 // store 2 6699eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com lea edx, [edx + 48] 6709eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com sub ecx, 16 67118184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 6729eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com ret 6739eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com } 6749eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com} 6759eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com 676d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 6779eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.comvoid ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { 678f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 6799eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com mov eax, [esp + 4] // src_argb 6809eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com mov edx, [esp + 8] // dst_rgb 6819eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com mov ecx, [esp + 12] // pix 682510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pcmpeqb xmm3, xmm3 // generate mask 0x0000001f 683510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com psrld xmm3, 27 684510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pcmpeqb xmm4, xmm4 // generate mask 0x000007e0 685510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com psrld xmm4, 26 686510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pslld xmm4, 5 687510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0xfffff800 688510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pslld xmm5, 11 6899eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com 690c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6919eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com convertloop: 6929eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com movdqa xmm0, [eax] // fetch 4 pixels of argb 6939eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com movdqa xmm1, xmm0 // B 6949eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com movdqa xmm2, xmm0 // G 695510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pslld xmm0, 8 // R 696510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com psrld xmm1, 3 // B 697510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com psrld xmm2, 5 // G 698510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com psrad xmm0, 16 // R 699510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pand xmm1, xmm3 // B 700510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pand xmm2, xmm4 // G 701510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pand xmm0, xmm5 // R 702510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com por xmm1, xmm2 // BG 703510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com por xmm0, xmm1 // BGR 7049eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com packssdw xmm0, xmm0 70524d2656b65beb2a86acf1913a5c025a6aca21299fbarchard@google.com lea eax, [eax + 16] 70615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com movq qword ptr [edx], xmm0 // store 4 pixels of RGB565 7079eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com lea edx, [edx + 8] 7089eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com sub ecx, 4 70918184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 7109eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com ret 7119eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com } 7129eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com} 7139eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com 714c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// TODO(fbarchard): Improve sign extension/packing. 715d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 7169eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.comvoid ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { 717f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 7189eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com mov eax, [esp + 4] // src_argb 7199eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com mov edx, [esp + 8] // dst_rgb 7209eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com mov ecx, [esp + 12] // pix 721510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pcmpeqb xmm4, xmm4 // generate mask 0x0000001f 722510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com psrld xmm4, 27 723510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com movdqa xmm5, xmm4 // generate mask 0x000003e0 724510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pslld xmm5, 5 725510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com movdqa xmm6, xmm4 // generate mask 0x00007c00 726510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pslld xmm6, 10 727510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pcmpeqb xmm7, xmm7 // generate mask 0xffff8000 728510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pslld xmm7, 15 7299eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com 730c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 7319eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com convertloop: 7329eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com movdqa xmm0, [eax] // fetch 4 pixels of argb 7339eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com movdqa xmm1, xmm0 // B 7349eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com movdqa xmm2, xmm0 // G 735510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com movdqa xmm3, xmm0 // R 736510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com psrad xmm0, 16 // A 737510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com psrld xmm1, 3 // B 738510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com psrld xmm2, 6 // G 739510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com psrld xmm3, 9 // R 740510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pand xmm0, xmm7 // A 741510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pand xmm1, xmm4 // B 742510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pand xmm2, xmm5 // G 743510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com pand xmm3, xmm6 // R 744510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com por xmm0, xmm1 // BA 745510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com por xmm2, xmm3 // GR 746510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com por xmm0, xmm2 // BGRA 74724d2656b65beb2a86acf1913a5c025a6aca21299fbarchard@google.com packssdw xmm0, xmm0 74824d2656b65beb2a86acf1913a5c025a6aca21299fbarchard@google.com lea eax, [eax + 16] 74924d2656b65beb2a86acf1913a5c025a6aca21299fbarchard@google.com movq qword ptr [edx], xmm0 // store 4 pixels of ARGB1555 7509eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com lea edx, [edx + 8] 7519eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com sub ecx, 4 75218184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 7539eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com ret 7549eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com } 7559eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com} 7569eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com 757d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 7589eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.comvoid ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { 759f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 760510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com mov eax, [esp + 4] // src_argb 761510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com mov edx, [esp + 8] // dst_rgb 762510fe70cb5c59f51cb48d854aef5393f6c85307dfbarchard@google.com mov ecx, [esp + 12] // pix 7639eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com pcmpeqb xmm4, xmm4 // generate mask 0xf000f000 7649eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com psllw xmm4, 12 7659eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com movdqa xmm3, xmm4 // generate mask 0x00f000f0 7669eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com psrlw xmm3, 8 7679eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com 768c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 7699eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com convertloop: 7709eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com movdqa xmm0, [eax] // fetch 4 pixels of argb 7719eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com movdqa xmm1, xmm0 7729eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com pand xmm0, xmm3 // low nibble 7739eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com pand xmm1, xmm4 // high nibble 7749eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com psrl xmm0, 4 7759eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com psrl xmm1, 8 7769eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com por xmm0, xmm1 7779eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com packuswb xmm0, xmm0 77824d2656b65beb2a86acf1913a5c025a6aca21299fbarchard@google.com lea eax, [eax + 16] 7799eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com movq qword ptr [edx], xmm0 // store 4 pixels of ARGB4444 7809eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com lea edx, [edx + 8] 7819eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com sub ecx, 4 78218184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 7839eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com ret 7849eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com } 7859eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com} 7869eefb2e8dd2c40a8b6bd0f02d794fe78332fc08ffbarchard@google.com 787c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Convert 16 ARGB pixels (64 bytes) to 16 Y values. 788d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 789585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.comvoid ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { 790f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 7919394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov eax, [esp + 4] /* src_argb */ 7929394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov edx, [esp + 8] /* dst_y */ 7939394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov ecx, [esp + 12] /* pix */ 7946334808d9d40071249ba9b51b65aa4e3b6e7f43ffbarchard@google.com movdqa xmm5, kAddY16 7956334808d9d40071249ba9b51b65aa4e3b6e7f43ffbarchard@google.com movdqa xmm4, kARGBToY 796585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com 797c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 798eaedc1d72735e68d45a0b42221a04902e648a21dfbarchard@google.com convertloop: 7999394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm0, [eax] 8009394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm1, [eax + 16] 8019394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm2, [eax + 32] 8029394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm3, [eax + 48] 803b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pmaddubsw xmm0, xmm4 804b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pmaddubsw xmm1, xmm4 805b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pmaddubsw xmm2, xmm4 806b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pmaddubsw xmm3, xmm4 8079394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com lea eax, [eax + 64] 8089394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com phaddw xmm0, xmm1 8099394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com phaddw xmm2, xmm3 8109394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com psrlw xmm0, 7 8119394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com psrlw xmm2, 7 8129394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com packuswb xmm0, xmm2 813b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com paddb xmm0, xmm5 814aa4750f86da4747c8a3d1488cd25c49c434fbe65fbarchard@google.com sub ecx, 16 8159394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa [edx], xmm0 8169394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com lea edx, [edx + 16] 81718184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 8189394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com ret 8199394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com } 8209394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com} 8219394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 822cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com// Convert 16 ARGB pixels (64 bytes) to 16 Y values. 823cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com__declspec(naked) __declspec(align(16)) 824cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.comvoid ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { 825cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com __asm { 826cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com mov eax, [esp + 4] /* src_argb */ 827cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com mov edx, [esp + 8] /* dst_y */ 828cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com mov ecx, [esp + 12] /* pix */ 8294e0d7cc2c60e8aa85954c48927c6be08ee2b9db4fbarchard@google.com movdqa xmm4, kARGBToYJ 8304e0d7cc2c60e8aa85954c48927c6be08ee2b9db4fbarchard@google.com movdqa xmm5, kAddYJ64 831cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com 832c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 833cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com convertloop: 834cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com movdqa xmm0, [eax] 835cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com movdqa xmm1, [eax + 16] 836cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com movdqa xmm2, [eax + 32] 837cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com movdqa xmm3, [eax + 48] 838cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com pmaddubsw xmm0, xmm4 839cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com pmaddubsw xmm1, xmm4 840cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com pmaddubsw xmm2, xmm4 841cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com pmaddubsw xmm3, xmm4 842cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com lea eax, [eax + 64] 843cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com phaddw xmm0, xmm1 844cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com phaddw xmm2, xmm3 8458c9de166a11222d6aa38deb12449b30451d2eca5fbarchard@google.com paddw xmm0, xmm5 // Add .5 for rounding. 8464e0d7cc2c60e8aa85954c48927c6be08ee2b9db4fbarchard@google.com paddw xmm2, xmm5 847cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com psrlw xmm0, 7 848cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com psrlw xmm2, 7 849cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com packuswb xmm0, xmm2 850cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com sub ecx, 16 851cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com movdqa [edx], xmm0 852cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com lea edx, [edx + 16] 853cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com jg convertloop 854cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com ret 855cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com } 856cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com} 857cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com 858551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com#ifdef HAS_ARGBTOYROW_AVX2 859551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com// Convert 32 ARGB pixels (128 bytes) to 32 Y values. 860551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com__declspec(naked) __declspec(align(32)) 861551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.comvoid ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) { 862551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com __asm { 863551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com mov eax, [esp + 4] /* src_argb */ 864551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com mov edx, [esp + 8] /* dst_y */ 865551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com mov ecx, [esp + 12] /* pix */ 866446f91d040aea92c0522745d176fe8017bd22382fbarchard@google.com vbroadcastf128 ymm4, kARGBToY 867446f91d040aea92c0522745d176fe8017bd22382fbarchard@google.com vbroadcastf128 ymm5, kAddY16 868446f91d040aea92c0522745d176fe8017bd22382fbarchard@google.com vmovdqa ymm6, kPermdARGBToY_AVX 869551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com 870c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 871551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com convertloop: 872b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm0, [eax] 873b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm1, [eax + 32] 874b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm2, [eax + 64] 875b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm3, [eax + 96] 876551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com vpmaddubsw ymm0, ymm0, ymm4 877551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com vpmaddubsw ymm1, ymm1, ymm4 878551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com vpmaddubsw ymm2, ymm2, ymm4 879551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com vpmaddubsw ymm3, ymm3, ymm4 880551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com lea eax, [eax + 128] 881caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vphaddw ymm0, ymm0, ymm1 // mutates. 882551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com vphaddw ymm2, ymm2, ymm3 883551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com vpsrlw ymm0, ymm0, 7 884551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com vpsrlw ymm2, ymm2, 7 885caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vpackuswb ymm0, ymm0, ymm2 // mutates. 886551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com vpermd ymm0, ymm6, ymm0 // For vphaddw + vpackuswb mutation. 887551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com vpaddb ymm0, ymm0, ymm5 888551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com sub ecx, 32 889b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu [edx], ymm0 890551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com lea edx, [edx + 32] 891551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com jg convertloop 8929b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 893551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com ret 894551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com } 895551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com} 896551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com#endif // HAS_ARGBTOYROW_AVX2 897551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com 89891c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com#ifdef HAS_ARGBTOYROW_AVX2 89991c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com// Convert 32 ARGB pixels (128 bytes) to 32 Y values. 90091c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com__declspec(naked) __declspec(align(32)) 90191c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.comvoid ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) { 90291c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com __asm { 90391c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com mov eax, [esp + 4] /* src_argb */ 90491c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com mov edx, [esp + 8] /* dst_y */ 90591c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com mov ecx, [esp + 12] /* pix */ 906446f91d040aea92c0522745d176fe8017bd22382fbarchard@google.com vbroadcastf128 ymm4, kARGBToYJ 907446f91d040aea92c0522745d176fe8017bd22382fbarchard@google.com vbroadcastf128 ymm5, kAddYJ64 908446f91d040aea92c0522745d176fe8017bd22382fbarchard@google.com vmovdqa ymm6, kPermdARGBToY_AVX 90991c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com 910c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 91191c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com convertloop: 91291c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vmovdqu ymm0, [eax] 91391c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vmovdqu ymm1, [eax + 32] 91491c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vmovdqu ymm2, [eax + 64] 91591c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vmovdqu ymm3, [eax + 96] 91691c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vpmaddubsw ymm0, ymm0, ymm4 91791c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vpmaddubsw ymm1, ymm1, ymm4 91891c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vpmaddubsw ymm2, ymm2, ymm4 91991c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vpmaddubsw ymm3, ymm3, ymm4 92091c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com lea eax, [eax + 128] 92191c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vphaddw ymm0, ymm0, ymm1 // mutates. 92291c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vphaddw ymm2, ymm2, ymm3 92391c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vpaddw ymm0, ymm0, ymm5 // Add .5 for rounding. 92491c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vpaddw ymm2, ymm2, ymm5 92591c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vpsrlw ymm0, ymm0, 7 92691c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vpsrlw ymm2, ymm2, 7 92791c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vpackuswb ymm0, ymm0, ymm2 // mutates. 92891c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vpermd ymm0, ymm6, ymm0 // For vphaddw + vpackuswb mutation. 92991c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com sub ecx, 32 93091c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vmovdqu [edx], ymm0 93191c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com lea edx, [edx + 32] 93291c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com jg convertloop 93391c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com 93491c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com vzeroupper 93591c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com ret 93691c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com } 93791c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com} 93891c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com#endif // HAS_ARGBTOYJROW_AVX2 93991c50c3a7d8736aa5834d6c54ae1c6bbea581e1ffbarchard@google.com 940d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 941b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.comvoid ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { 942f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 943b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov eax, [esp + 4] /* src_argb */ 944b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov edx, [esp + 8] /* dst_y */ 945b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov ecx, [esp + 12] /* pix */ 946b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm5, kAddY16 947b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm4, kARGBToY 948b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 949c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 950b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com convertloop: 951b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm0, [eax] 952b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm1, [eax + 16] 953b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm2, [eax + 32] 954b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm3, [eax + 48] 955b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm0, xmm4 956b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm1, xmm4 957b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm2, xmm4 958b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm3, xmm4 959b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com lea eax, [eax + 64] 960b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com phaddw xmm0, xmm1 961b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com phaddw xmm2, xmm3 962b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com psrlw xmm0, 7 963b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com psrlw xmm2, 7 964b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com packuswb xmm0, xmm2 965b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com paddb xmm0, xmm5 966aa4750f86da4747c8a3d1488cd25c49c434fbe65fbarchard@google.com sub ecx, 16 967b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu [edx], xmm0 968b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com lea edx, [edx + 16] 96918184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 970b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com ret 971b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com } 972b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com} 973b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 974d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 975cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.comvoid ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { 976cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com __asm { 977cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com mov eax, [esp + 4] /* src_argb */ 978cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com mov edx, [esp + 8] /* dst_y */ 979cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com mov ecx, [esp + 12] /* pix */ 9804e0d7cc2c60e8aa85954c48927c6be08ee2b9db4fbarchard@google.com movdqa xmm4, kARGBToYJ 9814e0d7cc2c60e8aa85954c48927c6be08ee2b9db4fbarchard@google.com movdqa xmm5, kAddYJ64 982cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com 983c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 984cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com convertloop: 985cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com movdqu xmm0, [eax] 986cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com movdqu xmm1, [eax + 16] 987cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com movdqu xmm2, [eax + 32] 988cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com movdqu xmm3, [eax + 48] 989cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com pmaddubsw xmm0, xmm4 990cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com pmaddubsw xmm1, xmm4 991cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com pmaddubsw xmm2, xmm4 992cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com pmaddubsw xmm3, xmm4 993cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com lea eax, [eax + 64] 994cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com phaddw xmm0, xmm1 995cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com phaddw xmm2, xmm3 9964e0d7cc2c60e8aa85954c48927c6be08ee2b9db4fbarchard@google.com paddw xmm0, xmm5 9974e0d7cc2c60e8aa85954c48927c6be08ee2b9db4fbarchard@google.com paddw xmm2, xmm5 998cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com psrlw xmm0, 7 999cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com psrlw xmm2, 7 1000cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com packuswb xmm0, xmm2 1001cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com sub ecx, 16 1002cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com movdqu [edx], xmm0 1003cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com lea edx, [edx + 16] 1004cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com jg convertloop 1005cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com ret 1006cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com } 1007cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com} 1008cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com 1009cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com__declspec(naked) __declspec(align(16)) 10109394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.comvoid BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { 1011f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 10129394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov eax, [esp + 4] /* src_argb */ 10139394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov edx, [esp + 8] /* dst_y */ 10149394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov ecx, [esp + 12] /* pix */ 10156334808d9d40071249ba9b51b65aa4e3b6e7f43ffbarchard@google.com movdqa xmm5, kAddY16 10166334808d9d40071249ba9b51b65aa4e3b6e7f43ffbarchard@google.com movdqa xmm4, kBGRAToY 10179394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 1018c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 1019eaedc1d72735e68d45a0b42221a04902e648a21dfbarchard@google.com convertloop: 10209394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm0, [eax] 10219394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm1, [eax + 16] 10229394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm2, [eax + 32] 10239394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm3, [eax + 48] 1024b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pmaddubsw xmm0, xmm4 1025b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pmaddubsw xmm1, xmm4 1026b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pmaddubsw xmm2, xmm4 1027b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pmaddubsw xmm3, xmm4 10289394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com lea eax, [eax + 64] 10299394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com phaddw xmm0, xmm1 10309394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com phaddw xmm2, xmm3 10319394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com psrlw xmm0, 7 10329394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com psrlw xmm2, 7 10339394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com packuswb xmm0, xmm2 1034b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com paddb xmm0, xmm5 1035aa4750f86da4747c8a3d1488cd25c49c434fbe65fbarchard@google.com sub ecx, 16 10369394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa [edx], xmm0 10379394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com lea edx, [edx + 16] 103818184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 10399394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com ret 10409394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com } 10419394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com} 10429394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 1043d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 1044b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.comvoid BGRAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { 1045f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 1046b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov eax, [esp + 4] /* src_argb */ 1047b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov edx, [esp + 8] /* dst_y */ 1048b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov ecx, [esp + 12] /* pix */ 1049b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm5, kAddY16 1050b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm4, kBGRAToY 1051b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 1052c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 1053b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com convertloop: 1054b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm0, [eax] 1055b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm1, [eax + 16] 1056b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm2, [eax + 32] 1057b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm3, [eax + 48] 1058b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm0, xmm4 1059b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm1, xmm4 1060b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm2, xmm4 1061b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm3, xmm4 1062b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com lea eax, [eax + 64] 1063b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com phaddw xmm0, xmm1 1064b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com phaddw xmm2, xmm3 1065b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com psrlw xmm0, 7 1066b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com psrlw xmm2, 7 1067b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com packuswb xmm0, xmm2 1068b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com paddb xmm0, xmm5 1069aa4750f86da4747c8a3d1488cd25c49c434fbe65fbarchard@google.com sub ecx, 16 1070b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu [edx], xmm0 1071b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com lea edx, [edx + 16] 107218184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 1073b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com ret 1074b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com } 1075b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com} 1076b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 1077d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 10789394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.comvoid ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { 1079f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 10809394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov eax, [esp + 4] /* src_argb */ 10819394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov edx, [esp + 8] /* dst_y */ 10829394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov ecx, [esp + 12] /* pix */ 10836334808d9d40071249ba9b51b65aa4e3b6e7f43ffbarchard@google.com movdqa xmm5, kAddY16 10846334808d9d40071249ba9b51b65aa4e3b6e7f43ffbarchard@google.com movdqa xmm4, kABGRToY 10859394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 1086c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 1087eaedc1d72735e68d45a0b42221a04902e648a21dfbarchard@google.com convertloop: 10889394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm0, [eax] 10899394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm1, [eax + 16] 10909394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm2, [eax + 32] 10919394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm3, [eax + 48] 1092b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pmaddubsw xmm0, xmm4 1093b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pmaddubsw xmm1, xmm4 1094b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pmaddubsw xmm2, xmm4 1095b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com pmaddubsw xmm3, xmm4 10969394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com lea eax, [eax + 64] 10979394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com phaddw xmm0, xmm1 10989394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com phaddw xmm2, xmm3 10999394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com psrlw xmm0, 7 11009394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com psrlw xmm2, 7 11019394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com packuswb xmm0, xmm2 1102b61497636a648c771ac55d184a80b17aca7414f5fbarchard@google.com paddb xmm0, xmm5 1103aa4750f86da4747c8a3d1488cd25c49c434fbe65fbarchard@google.com sub ecx, 16 11049394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa [edx], xmm0 11059394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com lea edx, [edx + 16] 110618184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 1107585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com ret 1108585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com } 1109585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com} 1110585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com 1111d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 1112b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.comvoid ABGRToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { 1113f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 1114b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov eax, [esp + 4] /* src_argb */ 1115b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov edx, [esp + 8] /* dst_y */ 1116b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov ecx, [esp + 12] /* pix */ 1117b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm5, kAddY16 1118b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm4, kABGRToY 1119b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 1120c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 1121b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com convertloop: 1122b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm0, [eax] 1123b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm1, [eax + 16] 1124b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm2, [eax + 32] 1125b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm3, [eax + 48] 1126b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm0, xmm4 1127b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm1, xmm4 1128b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm2, xmm4 1129b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm3, xmm4 1130b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com lea eax, [eax + 64] 1131b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com phaddw xmm0, xmm1 1132b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com phaddw xmm2, xmm3 1133b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com psrlw xmm0, 7 1134b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com psrlw xmm2, 7 1135b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com packuswb xmm0, xmm2 1136b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com paddb xmm0, xmm5 113718184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com sub ecx, 16 1138b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu [edx], xmm0 1139b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com lea edx, [edx + 16] 114018184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 1141b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com ret 1142b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com } 1143b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com} 1144b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 1145d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 114625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.comvoid RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { 1147f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 114825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov eax, [esp + 4] /* src_argb */ 114925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov edx, [esp + 8] /* dst_y */ 115025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov ecx, [esp + 12] /* pix */ 115125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm5, kAddY16 115225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm4, kRGBAToY 115325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 1154c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 115525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com convertloop: 115625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm0, [eax] 115725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm1, [eax + 16] 115825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm2, [eax + 32] 115925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm3, [eax + 48] 116025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pmaddubsw xmm0, xmm4 116125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pmaddubsw xmm1, xmm4 116225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pmaddubsw xmm2, xmm4 116325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pmaddubsw xmm3, xmm4 116425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com lea eax, [eax + 64] 116525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com phaddw xmm0, xmm1 116625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com phaddw xmm2, xmm3 116725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com psrlw xmm0, 7 116825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com psrlw xmm2, 7 116925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com packuswb xmm0, xmm2 117025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com paddb xmm0, xmm5 1171aa4750f86da4747c8a3d1488cd25c49c434fbe65fbarchard@google.com sub ecx, 16 117225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa [edx], xmm0 117325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com lea edx, [edx + 16] 117425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com jg convertloop 117525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com ret 117625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com } 117725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com} 117825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 117925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com__declspec(naked) __declspec(align(16)) 118025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.comvoid RGBAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { 1181f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 118225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov eax, [esp + 4] /* src_argb */ 118325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov edx, [esp + 8] /* dst_y */ 118425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov ecx, [esp + 12] /* pix */ 118525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm5, kAddY16 118625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm4, kRGBAToY 118725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 1188c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 118925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com convertloop: 119025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu xmm0, [eax] 119125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu xmm1, [eax + 16] 119225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu xmm2, [eax + 32] 119325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu xmm3, [eax + 48] 119425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pmaddubsw xmm0, xmm4 119525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pmaddubsw xmm1, xmm4 119625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pmaddubsw xmm2, xmm4 119725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pmaddubsw xmm3, xmm4 119825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com lea eax, [eax + 64] 119925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com phaddw xmm0, xmm1 120025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com phaddw xmm2, xmm3 120125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com psrlw xmm0, 7 120225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com psrlw xmm2, 7 120325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com packuswb xmm0, xmm2 120425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com paddb xmm0, xmm5 120525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com sub ecx, 16 120625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu [edx], xmm0 120725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com lea edx, [edx + 16] 120825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com jg convertloop 120925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com ret 121025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com } 121125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com} 121225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 121325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com__declspec(naked) __declspec(align(16)) 1214585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.comvoid ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, 1215585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com uint8* dst_u, uint8* dst_v, int width) { 1216f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 1217585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com push esi 1218585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com push edi 1219585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com mov eax, [esp + 8 + 4] // src_argb 1220585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com mov esi, [esp + 8 + 8] // src_stride_argb 1221585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com mov edx, [esp + 8 + 12] // dst_u 1222585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com mov edi, [esp + 8 + 16] // dst_v 1223585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com mov ecx, [esp + 8 + 20] // pix 12246334808d9d40071249ba9b51b65aa4e3b6e7f43ffbarchard@google.com movdqa xmm7, kARGBToU 12256334808d9d40071249ba9b51b65aa4e3b6e7f43ffbarchard@google.com movdqa xmm6, kARGBToV 12266334808d9d40071249ba9b51b65aa4e3b6e7f43ffbarchard@google.com movdqa xmm5, kAddUV128 12279394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com sub edi, edx // stride from u to v 1228585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com 1229c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 1230eaedc1d72735e68d45a0b42221a04902e648a21dfbarchard@google.com convertloop: 12319394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com /* step 1 - subsample 16x2 argb pixels to 8x1 */ 12329394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm0, [eax] 1233585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com movdqa xmm1, [eax + 16] 12349394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm2, [eax + 32] 12359394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm3, [eax + 48] 12369394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm0, [eax + esi] 12379394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm1, [eax + esi + 16] 12389394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm2, [eax + esi + 32] 12399394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm3, [eax + esi + 48] 12409394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com lea eax, [eax + 64] 12419394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm4, xmm0 1242585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com shufps xmm0, xmm1, 0x88 12439394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com shufps xmm4, xmm1, 0xdd 12449394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm0, xmm4 12459394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm4, xmm2 12469394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com shufps xmm2, xmm3, 0x88 12479394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com shufps xmm4, xmm3, 0xdd 12489394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm2, xmm4 1249585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com 1250585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com // step 2 - convert to U and V 1251585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com // from here down is very similar to Y code except 12529394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com // instead of 16 different pixels, its 8 pixels of U and 8 of V 1253585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com movdqa xmm1, xmm0 12549394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm3, xmm2 12559394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pmaddubsw xmm0, xmm7 // U 12569394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pmaddubsw xmm2, xmm7 12579394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pmaddubsw xmm1, xmm6 // V 12589394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pmaddubsw xmm3, xmm6 12599394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com phaddw xmm0, xmm2 12609394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com phaddw xmm1, xmm3 12619394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com psraw xmm0, 8 12629394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com psraw xmm1, 8 12639394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com packsswb xmm0, xmm1 12649394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com paddb xmm0, xmm5 // -> unsigned 12659394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 12669394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com // step 3 - store 8 U and 8 V values 126718184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com sub ecx, 16 12689394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movlps qword ptr [edx], xmm0 // U 12699394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movhps qword ptr [edx + edi], xmm0 // V 12709394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com lea edx, [edx + 8] 127118184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 127218184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com 1273585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com pop edi 1274585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com pop esi 1275585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com ret 1276585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com } 1277585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com} 1278585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com 1279050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com__declspec(naked) __declspec(align(16)) 1280050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.comvoid ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb, 1281050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com uint8* dst_u, uint8* dst_v, int width) { 1282050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com __asm { 1283050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com push esi 1284050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com push edi 1285050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com mov eax, [esp + 8 + 4] // src_argb 1286050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com mov esi, [esp + 8 + 8] // src_stride_argb 1287050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com mov edx, [esp + 8 + 12] // dst_u 1288050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com mov edi, [esp + 8 + 16] // dst_v 1289050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com mov ecx, [esp + 8 + 20] // pix 1290050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm7, kARGBToUJ 1291050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm6, kARGBToVJ 1292050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm5, kAddUVJ128 1293050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com sub edi, edx // stride from u to v 1294050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 1295c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 1296050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com convertloop: 1297050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com /* step 1 - subsample 16x2 argb pixels to 8x1 */ 1298050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm0, [eax] 1299050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm1, [eax + 16] 1300050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm2, [eax + 32] 1301050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm3, [eax + 48] 1302050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pavgb xmm0, [eax + esi] 1303050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pavgb xmm1, [eax + esi + 16] 1304050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pavgb xmm2, [eax + esi + 32] 1305050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pavgb xmm3, [eax + esi + 48] 1306050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com lea eax, [eax + 64] 1307050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm4, xmm0 1308050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com shufps xmm0, xmm1, 0x88 1309050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com shufps xmm4, xmm1, 0xdd 1310050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pavgb xmm0, xmm4 1311050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm4, xmm2 1312050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com shufps xmm2, xmm3, 0x88 1313050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com shufps xmm4, xmm3, 0xdd 1314050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pavgb xmm2, xmm4 1315050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 1316050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com // step 2 - convert to U and V 1317050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com // from here down is very similar to Y code except 1318050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com // instead of 16 different pixels, its 8 pixels of U and 8 of V 1319050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm1, xmm0 1320050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm3, xmm2 1321050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pmaddubsw xmm0, xmm7 // U 1322050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pmaddubsw xmm2, xmm7 1323050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pmaddubsw xmm1, xmm6 // V 1324050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pmaddubsw xmm3, xmm6 1325050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com phaddw xmm0, xmm2 1326050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com phaddw xmm1, xmm3 1327050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com paddw xmm0, xmm5 // +.5 rounding -> unsigned 1328050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com paddw xmm1, xmm5 1329050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com psraw xmm0, 8 1330050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com psraw xmm1, 8 1331050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com packsswb xmm0, xmm1 1332050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 1333050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com // step 3 - store 8 U and 8 V values 1334050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com sub ecx, 16 1335050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movlps qword ptr [edx], xmm0 // U 1336050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movhps qword ptr [edx + edi], xmm0 // V 1337050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com lea edx, [edx + 8] 1338050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com jg convertloop 1339050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 1340050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pop edi 1341050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pop esi 1342050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com ret 1343050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com } 1344050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com} 1345050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 1346551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com#ifdef HAS_ARGBTOUVROW_AVX2 1347551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com__declspec(naked) __declspec(align(32)) 1348551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.comvoid ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb, 1349551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com uint8* dst_u, uint8* dst_v, int width) { 1350551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com __asm { 1351551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com push esi 1352551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com push edi 1353551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com mov eax, [esp + 8 + 4] // src_argb 1354551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com mov esi, [esp + 8 + 8] // src_stride_argb 1355551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com mov edx, [esp + 8 + 12] // dst_u 1356551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com mov edi, [esp + 8 + 16] // dst_v 1357551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com mov ecx, [esp + 8 + 20] // pix 1358446f91d040aea92c0522745d176fe8017bd22382fbarchard@google.com vbroadcastf128 ymm5, kAddUV128 1359446f91d040aea92c0522745d176fe8017bd22382fbarchard@google.com vbroadcastf128 ymm6, kARGBToV 1360446f91d040aea92c0522745d176fe8017bd22382fbarchard@google.com vbroadcastf128 ymm7, kARGBToU 1361551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com sub edi, edx // stride from u to v 1362551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com 1363c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 1364551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com convertloop: 1365b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com /* step 1 - subsample 32x2 argb pixels to 16x1 */ 1366caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vmovdqu ymm0, [eax] 1367caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vmovdqu ymm1, [eax + 32] 1368caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vmovdqu ymm2, [eax + 64] 1369caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vmovdqu ymm3, [eax + 96] 1370caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vpavgb ymm0, ymm0, [eax + esi] 1371caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vpavgb ymm1, ymm1, [eax + esi + 32] 1372caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vpavgb ymm2, ymm2, [eax + esi + 64] 1373caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vpavgb ymm3, ymm3, [eax + esi + 96] 1374caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com lea eax, [eax + 128] 1375caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vshufps ymm4, ymm0, ymm1, 0x88 1376caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vshufps ymm0, ymm0, ymm1, 0xdd 1377caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vpavgb ymm0, ymm0, ymm4 // mutated by vshufps 1378caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vshufps ymm4, ymm2, ymm3, 0x88 1379caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vshufps ymm2, ymm2, ymm3, 0xdd 1380caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vpavgb ymm2, ymm2, ymm4 // mutated by vshufps 1381551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com 1382551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com // step 2 - convert to U and V 1383551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com // from here down is very similar to Y code except 1384551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com // instead of 32 different pixels, its 16 pixels of U and 16 of V 1385caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vpmaddubsw ymm1, ymm0, ymm7 // U 1386caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vpmaddubsw ymm3, ymm2, ymm7 1387caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vpmaddubsw ymm0, ymm0, ymm6 // V 1388caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vpmaddubsw ymm2, ymm2, ymm6 1389caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vphaddw ymm1, ymm1, ymm3 // mutates 1390caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vphaddw ymm0, ymm0, ymm2 1391caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vpsraw ymm1, ymm1, 8 1392caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vpsraw ymm0, ymm0, 8 1393caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vpacksswb ymm0, ymm1, ymm0 // mutates 1394caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vpermq ymm0, ymm0, 0xd8 // For vpacksswb 1395caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vpshufb ymm0, ymm0, kShufARGBToUV_AVX // For vshufps + vphaddw 1396caf6e2470b15fd4a8df03351b07683352226824cfbarchard@google.com vpaddb ymm0, ymm0, ymm5 // -> unsigned 1397551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com 1398551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com // step 3 - store 16 U and 16 V values 1399551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com sub ecx, 32 1400b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vextractf128 [edx], ymm0, 0 // U 1401b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vextractf128 [edx + edi], ymm0, 1 // V 1402551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com lea edx, [edx + 16] 1403551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com jg convertloop 1404551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com 1405551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com pop edi 1406551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com pop esi 14079b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 1408551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com ret 1409551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com } 1410551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com} 1411551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com#endif // HAS_ARGBTOUVROW_AVX2 1412551d2b297e6a071fe58a8f2da2cb69cc0ec56ed8fbarchard@google.com 1413d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 1414b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.comvoid ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, 1415b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com uint8* dst_u, uint8* dst_v, int width) { 1416f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 1417b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com push esi 1418b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com push edi 1419b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov eax, [esp + 8 + 4] // src_argb 1420b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov esi, [esp + 8 + 8] // src_stride_argb 1421b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov edx, [esp + 8 + 12] // dst_u 1422b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov edi, [esp + 8 + 16] // dst_v 1423b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov ecx, [esp + 8 + 20] // pix 1424b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm7, kARGBToU 1425b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm6, kARGBToV 1426b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm5, kAddUV128 1427b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com sub edi, edx // stride from u to v 1428b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 1429c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 1430b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com convertloop: 1431b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com /* step 1 - subsample 16x2 argb pixels to 8x1 */ 1432b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm0, [eax] 1433b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm1, [eax + 16] 1434b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm2, [eax + 32] 1435b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm3, [eax + 48] 1436b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm4, [eax + esi] 1437b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm0, xmm4 1438b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm4, [eax + esi + 16] 1439b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm1, xmm4 1440b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm4, [eax + esi + 32] 1441b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm2, xmm4 1442b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm4, [eax + esi + 48] 1443b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm3, xmm4 1444b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com lea eax, [eax + 64] 1445b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm4, xmm0 1446b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com shufps xmm0, xmm1, 0x88 1447b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com shufps xmm4, xmm1, 0xdd 1448b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm0, xmm4 1449b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm4, xmm2 1450b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com shufps xmm2, xmm3, 0x88 1451b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com shufps xmm4, xmm3, 0xdd 1452b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm2, xmm4 1453b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 1454b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com // step 2 - convert to U and V 1455b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com // from here down is very similar to Y code except 1456b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com // instead of 16 different pixels, its 8 pixels of U and 8 of V 1457b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm1, xmm0 1458b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm3, xmm2 1459b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm0, xmm7 // U 1460b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm2, xmm7 1461b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm1, xmm6 // V 1462b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm3, xmm6 1463b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com phaddw xmm0, xmm2 1464b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com phaddw xmm1, xmm3 1465b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com psraw xmm0, 8 1466b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com psraw xmm1, 8 1467b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com packsswb xmm0, xmm1 1468b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com paddb xmm0, xmm5 // -> unsigned 1469b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 1470b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com // step 3 - store 8 U and 8 V values 147118184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com sub ecx, 16 1472b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movlps qword ptr [edx], xmm0 // U 1473b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movhps qword ptr [edx + edi], xmm0 // V 1474b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com lea edx, [edx + 8] 147518184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 147618184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com 1477b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pop edi 1478b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pop esi 1479b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com ret 1480b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com } 1481b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com} 1482b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 1483d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 1484050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.comvoid ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, 1485050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com uint8* dst_u, uint8* dst_v, int width) { 1486050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com __asm { 1487050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com push esi 1488050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com push edi 1489050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com mov eax, [esp + 8 + 4] // src_argb 1490050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com mov esi, [esp + 8 + 8] // src_stride_argb 1491050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com mov edx, [esp + 8 + 12] // dst_u 1492050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com mov edi, [esp + 8 + 16] // dst_v 1493050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com mov ecx, [esp + 8 + 20] // pix 1494050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm7, kARGBToUJ 1495050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm6, kARGBToVJ 1496050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm5, kAddUVJ128 1497050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com sub edi, edx // stride from u to v 1498050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 1499c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 1500050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com convertloop: 1501050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com /* step 1 - subsample 16x2 argb pixels to 8x1 */ 1502050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqu xmm0, [eax] 1503050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqu xmm1, [eax + 16] 1504050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqu xmm2, [eax + 32] 1505050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqu xmm3, [eax + 48] 1506050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqu xmm4, [eax + esi] 1507050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pavgb xmm0, xmm4 1508050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqu xmm4, [eax + esi + 16] 1509050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pavgb xmm1, xmm4 1510050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqu xmm4, [eax + esi + 32] 1511050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pavgb xmm2, xmm4 1512050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqu xmm4, [eax + esi + 48] 1513050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pavgb xmm3, xmm4 1514050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com lea eax, [eax + 64] 1515050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm4, xmm0 1516050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com shufps xmm0, xmm1, 0x88 1517050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com shufps xmm4, xmm1, 0xdd 1518050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pavgb xmm0, xmm4 1519050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm4, xmm2 1520050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com shufps xmm2, xmm3, 0x88 1521050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com shufps xmm4, xmm3, 0xdd 1522050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pavgb xmm2, xmm4 1523050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 1524050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com // step 2 - convert to U and V 1525050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com // from here down is very similar to Y code except 1526050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com // instead of 16 different pixels, its 8 pixels of U and 8 of V 1527050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm1, xmm0 1528050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm3, xmm2 1529050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pmaddubsw xmm0, xmm7 // U 1530050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pmaddubsw xmm2, xmm7 1531050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pmaddubsw xmm1, xmm6 // V 1532050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pmaddubsw xmm3, xmm6 1533050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com phaddw xmm0, xmm2 1534050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com phaddw xmm1, xmm3 1535050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com paddw xmm0, xmm5 // +.5 rounding -> unsigned 1536050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com paddw xmm1, xmm5 1537050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com psraw xmm0, 8 1538050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com psraw xmm1, 8 1539050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com packsswb xmm0, xmm1 1540050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 1541050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com // step 3 - store 8 U and 8 V values 1542050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com sub ecx, 16 1543050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movlps qword ptr [edx], xmm0 // U 1544050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movhps qword ptr [edx + edi], xmm0 // V 1545050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com lea edx, [edx + 8] 1546050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com jg convertloop 1547050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 1548050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pop edi 1549050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com pop esi 1550050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com ret 1551050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com } 1552050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com} 1553050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 1554050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com__declspec(naked) __declspec(align(16)) 155541e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.comvoid ARGBToUV444Row_SSSE3(const uint8* src_argb0, 155641e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com uint8* dst_u, uint8* dst_v, int width) { 1557f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 155841e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com push edi 155941e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com mov eax, [esp + 4 + 4] // src_argb 156041e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com mov edx, [esp + 4 + 8] // dst_u 156141e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com mov edi, [esp + 4 + 12] // dst_v 156241e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com mov ecx, [esp + 4 + 16] // pix 156341e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqa xmm7, kARGBToU 156441e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqa xmm6, kARGBToV 156541e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqa xmm5, kAddUV128 156641e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com sub edi, edx // stride from u to v 156741e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com 1568c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 156941e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com convertloop: 157041e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com /* convert to U and V */ 157141e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqa xmm0, [eax] // U 157241e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqa xmm1, [eax + 16] 157341e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqa xmm2, [eax + 32] 157441e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqa xmm3, [eax + 48] 157541e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pmaddubsw xmm0, xmm7 157641e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pmaddubsw xmm1, xmm7 157741e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pmaddubsw xmm2, xmm7 157841e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pmaddubsw xmm3, xmm7 157941e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com phaddw xmm0, xmm1 158041e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com phaddw xmm2, xmm3 1581d8b73cacbefdf3cf397a410edba1d58296844adbfbarchard@google.com psraw xmm0, 8 1582d8b73cacbefdf3cf397a410edba1d58296844adbfbarchard@google.com psraw xmm2, 8 1583d8b73cacbefdf3cf397a410edba1d58296844adbfbarchard@google.com packsswb xmm0, xmm2 158441e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com paddb xmm0, xmm5 158541e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com sub ecx, 16 158641e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqa [edx], xmm0 158741e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com 158841e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqa xmm0, [eax] // V 158941e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqa xmm1, [eax + 16] 159041e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqa xmm2, [eax + 32] 159141e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqa xmm3, [eax + 48] 159241e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pmaddubsw xmm0, xmm6 159341e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pmaddubsw xmm1, xmm6 159441e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pmaddubsw xmm2, xmm6 159541e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pmaddubsw xmm3, xmm6 159641e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com phaddw xmm0, xmm1 159741e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com phaddw xmm2, xmm3 1598d8b73cacbefdf3cf397a410edba1d58296844adbfbarchard@google.com psraw xmm0, 8 1599d8b73cacbefdf3cf397a410edba1d58296844adbfbarchard@google.com psraw xmm2, 8 1600d8b73cacbefdf3cf397a410edba1d58296844adbfbarchard@google.com packsswb xmm0, xmm2 160141e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com paddb xmm0, xmm5 160241e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com lea eax, [eax + 64] 160341e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqa [edx + edi], xmm0 160441e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com lea edx, [edx + 16] 160541e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com jg convertloop 160641e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com 160741e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pop edi 160841e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com ret 160941e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com } 161041e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com} 161141e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com 161241e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com__declspec(naked) __declspec(align(16)) 161341e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.comvoid ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb0, 161441e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com uint8* dst_u, uint8* dst_v, int width) { 1615f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 161641e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com push edi 161741e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com mov eax, [esp + 4 + 4] // src_argb 161841e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com mov edx, [esp + 4 + 8] // dst_u 161941e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com mov edi, [esp + 4 + 12] // dst_v 162041e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com mov ecx, [esp + 4 + 16] // pix 162141e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqa xmm7, kARGBToU 162241e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqa xmm6, kARGBToV 162341e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqa xmm5, kAddUV128 162441e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com sub edi, edx // stride from u to v 162541e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com 1626c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 162741e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com convertloop: 162841e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com /* convert to U and V */ 162941e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqu xmm0, [eax] // U 163041e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqu xmm1, [eax + 16] 163141e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqu xmm2, [eax + 32] 163241e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqu xmm3, [eax + 48] 163341e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pmaddubsw xmm0, xmm7 163441e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pmaddubsw xmm1, xmm7 163541e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pmaddubsw xmm2, xmm7 163641e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pmaddubsw xmm3, xmm7 163741e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com phaddw xmm0, xmm1 163841e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com phaddw xmm2, xmm3 1639d8b73cacbefdf3cf397a410edba1d58296844adbfbarchard@google.com psraw xmm0, 8 1640d8b73cacbefdf3cf397a410edba1d58296844adbfbarchard@google.com psraw xmm2, 8 1641d8b73cacbefdf3cf397a410edba1d58296844adbfbarchard@google.com packsswb xmm0, xmm2 164241e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com paddb xmm0, xmm5 164341e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com sub ecx, 16 164441e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqu [edx], xmm0 164541e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com 164641e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqu xmm0, [eax] // V 164741e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqu xmm1, [eax + 16] 164841e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqu xmm2, [eax + 32] 164941e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqu xmm3, [eax + 48] 165041e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pmaddubsw xmm0, xmm6 165141e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pmaddubsw xmm1, xmm6 165241e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pmaddubsw xmm2, xmm6 165341e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pmaddubsw xmm3, xmm6 165441e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com phaddw xmm0, xmm1 165541e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com phaddw xmm2, xmm3 1656d8b73cacbefdf3cf397a410edba1d58296844adbfbarchard@google.com psraw xmm0, 8 1657d8b73cacbefdf3cf397a410edba1d58296844adbfbarchard@google.com psraw xmm2, 8 1658d8b73cacbefdf3cf397a410edba1d58296844adbfbarchard@google.com packsswb xmm0, xmm2 165941e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com paddb xmm0, xmm5 166041e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com lea eax, [eax + 64] 166141e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com movdqu [edx + edi], xmm0 166241e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com lea edx, [edx + 16] 166341e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com jg convertloop 166441e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com 166541e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com pop edi 166641e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com ret 166741e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com } 166841e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com} 166941e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com 167041e972ec31e83b531f4cef30b5be63ffa6aa3cfdfbarchard@google.com__declspec(naked) __declspec(align(16)) 1671bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid ARGBToUV422Row_SSSE3(const uint8* src_argb0, 1672bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_u, uint8* dst_v, int width) { 1673f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 1674bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com push edi 1675bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com mov eax, [esp + 4 + 4] // src_argb 1676bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com mov edx, [esp + 4 + 8] // dst_u 1677bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com mov edi, [esp + 4 + 12] // dst_v 1678bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com mov ecx, [esp + 4 + 16] // pix 1679bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm7, kARGBToU 1680bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm6, kARGBToV 1681bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm5, kAddUV128 1682bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com sub edi, edx // stride from u to v 1683bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 1684c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 1685bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com convertloop: 1686bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com /* step 1 - subsample 16x2 argb pixels to 8x1 */ 1687bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm0, [eax] 1688bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm1, [eax + 16] 1689bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm2, [eax + 32] 1690bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm3, [eax + 48] 1691bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com lea eax, [eax + 64] 1692bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm4, xmm0 1693bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com shufps xmm0, xmm1, 0x88 1694bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com shufps xmm4, xmm1, 0xdd 1695bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com pavgb xmm0, xmm4 1696bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm4, xmm2 1697bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com shufps xmm2, xmm3, 0x88 1698bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com shufps xmm4, xmm3, 0xdd 1699bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com pavgb xmm2, xmm4 1700bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 1701bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com // step 2 - convert to U and V 1702bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com // from here down is very similar to Y code except 1703bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com // instead of 16 different pixels, its 8 pixels of U and 8 of V 1704bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm1, xmm0 1705bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm3, xmm2 1706bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com pmaddubsw xmm0, xmm7 // U 1707bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com pmaddubsw xmm2, xmm7 1708bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com pmaddubsw xmm1, xmm6 // V 1709bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com pmaddubsw xmm3, xmm6 1710bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com phaddw xmm0, xmm2 1711bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com phaddw xmm1, xmm3 1712bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com psraw xmm0, 8 1713bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com psraw xmm1, 8 1714bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com packsswb xmm0, xmm1 1715bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com paddb xmm0, xmm5 // -> unsigned 1716bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 1717bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com // step 3 - store 8 U and 8 V values 1718bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com sub ecx, 16 1719bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movlps qword ptr [edx], xmm0 // U 1720bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movhps qword ptr [edx + edi], xmm0 // V 1721bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com lea edx, [edx + 8] 1722bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com jg convertloop 1723bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 1724bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com pop edi 1725bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com ret 1726bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com } 1727bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com} 1728bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 1729bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com__declspec(naked) __declspec(align(16)) 1730bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0, 1731bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_u, uint8* dst_v, int width) { 1732f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 1733bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com push edi 1734bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com mov eax, [esp + 4 + 4] // src_argb 1735bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com mov edx, [esp + 4 + 8] // dst_u 1736bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com mov edi, [esp + 4 + 12] // dst_v 1737bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com mov ecx, [esp + 4 + 16] // pix 1738bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm7, kARGBToU 1739bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm6, kARGBToV 1740bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm5, kAddUV128 1741bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com sub edi, edx // stride from u to v 1742bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 1743c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 1744bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com convertloop: 1745bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com /* step 1 - subsample 16x2 argb pixels to 8x1 */ 1746bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqu xmm0, [eax] 1747bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqu xmm1, [eax + 16] 1748bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqu xmm2, [eax + 32] 1749bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqu xmm3, [eax + 48] 1750bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com lea eax, [eax + 64] 1751bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm4, xmm0 1752bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com shufps xmm0, xmm1, 0x88 1753bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com shufps xmm4, xmm1, 0xdd 1754bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com pavgb xmm0, xmm4 1755bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm4, xmm2 1756bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com shufps xmm2, xmm3, 0x88 1757bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com shufps xmm4, xmm3, 0xdd 1758bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com pavgb xmm2, xmm4 1759bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 1760bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com // step 2 - convert to U and V 1761bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com // from here down is very similar to Y code except 1762bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com // instead of 16 different pixels, its 8 pixels of U and 8 of V 1763bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm1, xmm0 1764bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movdqa xmm3, xmm2 1765bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com pmaddubsw xmm0, xmm7 // U 1766bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com pmaddubsw xmm2, xmm7 1767bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com pmaddubsw xmm1, xmm6 // V 1768bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com pmaddubsw xmm3, xmm6 1769bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com phaddw xmm0, xmm2 1770bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com phaddw xmm1, xmm3 1771bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com psraw xmm0, 8 1772bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com psraw xmm1, 8 1773bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com packsswb xmm0, xmm1 1774bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com paddb xmm0, xmm5 // -> unsigned 1775bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 1776bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com // step 3 - store 8 U and 8 V values 1777bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com sub ecx, 16 1778bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movlps qword ptr [edx], xmm0 // U 1779bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com movhps qword ptr [edx + edi], xmm0 // V 1780bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com lea edx, [edx + 8] 1781bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com jg convertloop 1782bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 1783bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com pop edi 1784bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com ret 1785bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com } 1786bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com} 1787bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 1788bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com__declspec(naked) __declspec(align(16)) 17899394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.comvoid BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, 17909394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com uint8* dst_u, uint8* dst_v, int width) { 1791f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 17929394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com push esi 17939394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com push edi 17949394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov eax, [esp + 8 + 4] // src_argb 17959394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov esi, [esp + 8 + 8] // src_stride_argb 17969394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov edx, [esp + 8 + 12] // dst_u 17979394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov edi, [esp + 8 + 16] // dst_v 17989394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov ecx, [esp + 8 + 20] // pix 17996334808d9d40071249ba9b51b65aa4e3b6e7f43ffbarchard@google.com movdqa xmm7, kBGRAToU 18006334808d9d40071249ba9b51b65aa4e3b6e7f43ffbarchard@google.com movdqa xmm6, kBGRAToV 18016334808d9d40071249ba9b51b65aa4e3b6e7f43ffbarchard@google.com movdqa xmm5, kAddUV128 18029394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com sub edi, edx // stride from u to v 1803585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com 1804c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 1805eaedc1d72735e68d45a0b42221a04902e648a21dfbarchard@google.com convertloop: 18069394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com /* step 1 - subsample 16x2 argb pixels to 8x1 */ 18079394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm0, [eax] 18089394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm1, [eax + 16] 18099394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm2, [eax + 32] 18109394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm3, [eax + 48] 18119394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm0, [eax + esi] 18129394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm1, [eax + esi + 16] 18139394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm2, [eax + esi + 32] 18149394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm3, [eax + esi + 48] 18159394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com lea eax, [eax + 64] 18169394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm4, xmm0 18179394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com shufps xmm0, xmm1, 0x88 18189394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com shufps xmm4, xmm1, 0xdd 18199394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm0, xmm4 18209394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm4, xmm2 18219394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com shufps xmm2, xmm3, 0x88 18229394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com shufps xmm4, xmm3, 0xdd 18239394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm2, xmm4 18249394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 18259394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com // step 2 - convert to U and V 18269394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com // from here down is very similar to Y code except 18279394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com // instead of 16 different pixels, its 8 pixels of U and 8 of V 18289394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm1, xmm0 18299394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm3, xmm2 18309394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pmaddubsw xmm0, xmm7 // U 18319394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pmaddubsw xmm2, xmm7 18329394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pmaddubsw xmm1, xmm6 // V 18339394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pmaddubsw xmm3, xmm6 18349394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com phaddw xmm0, xmm2 18359394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com phaddw xmm1, xmm3 18369394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com psraw xmm0, 8 18379394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com psraw xmm1, 8 18389394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com packsswb xmm0, xmm1 18399394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com paddb xmm0, xmm5 // -> unsigned 18409394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 18419394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com // step 3 - store 8 U and 8 V values 184218184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com sub ecx, 16 18439394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movlps qword ptr [edx], xmm0 // U 18449394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movhps qword ptr [edx + edi], xmm0 // V 18459394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com lea edx, [edx + 8] 184618184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 184718184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com 18489394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pop edi 18499394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pop esi 18509394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com ret 18519394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com } 1852585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com} 1853585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com 1854d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 1855b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.comvoid BGRAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, 1856b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com uint8* dst_u, uint8* dst_v, int width) { 1857f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 1858b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com push esi 1859b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com push edi 1860b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov eax, [esp + 8 + 4] // src_argb 1861b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov esi, [esp + 8 + 8] // src_stride_argb 1862b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov edx, [esp + 8 + 12] // dst_u 1863b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov edi, [esp + 8 + 16] // dst_v 1864b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov ecx, [esp + 8 + 20] // pix 1865b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm7, kBGRAToU 1866b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm6, kBGRAToV 1867b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm5, kAddUV128 1868b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com sub edi, edx // stride from u to v 1869b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 1870c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 1871b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com convertloop: 1872b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com /* step 1 - subsample 16x2 argb pixels to 8x1 */ 1873b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm0, [eax] 1874b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm1, [eax + 16] 1875b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm2, [eax + 32] 1876b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm3, [eax + 48] 1877b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm4, [eax + esi] 1878b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm0, xmm4 1879b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm4, [eax + esi + 16] 1880b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm1, xmm4 1881b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm4, [eax + esi + 32] 1882b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm2, xmm4 1883b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm4, [eax + esi + 48] 1884b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm3, xmm4 1885b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com lea eax, [eax + 64] 1886b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm4, xmm0 1887b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com shufps xmm0, xmm1, 0x88 1888b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com shufps xmm4, xmm1, 0xdd 1889b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm0, xmm4 1890b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm4, xmm2 1891b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com shufps xmm2, xmm3, 0x88 1892b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com shufps xmm4, xmm3, 0xdd 1893b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm2, xmm4 1894b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 1895b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com // step 2 - convert to U and V 1896b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com // from here down is very similar to Y code except 1897b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com // instead of 16 different pixels, its 8 pixels of U and 8 of V 1898b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm1, xmm0 1899b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm3, xmm2 1900b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm0, xmm7 // U 1901b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm2, xmm7 1902b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm1, xmm6 // V 1903b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm3, xmm6 1904b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com phaddw xmm0, xmm2 1905b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com phaddw xmm1, xmm3 1906b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com psraw xmm0, 8 1907b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com psraw xmm1, 8 1908b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com packsswb xmm0, xmm1 1909b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com paddb xmm0, xmm5 // -> unsigned 1910b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 1911b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com // step 3 - store 8 U and 8 V values 191218184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com sub ecx, 16 1913b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movlps qword ptr [edx], xmm0 // U 1914b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movhps qword ptr [edx + edi], xmm0 // V 1915b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com lea edx, [edx + 8] 191618184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 191718184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com 1918b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pop edi 1919b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pop esi 1920b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com ret 1921b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com } 1922b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com} 1923b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 1924d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 19259394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.comvoid ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, 19269394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com uint8* dst_u, uint8* dst_v, int width) { 1927f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 19289394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com push esi 19299394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com push edi 19309394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov eax, [esp + 8 + 4] // src_argb 19319394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov esi, [esp + 8 + 8] // src_stride_argb 19329394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov edx, [esp + 8 + 12] // dst_u 19339394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov edi, [esp + 8 + 16] // dst_v 19349394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com mov ecx, [esp + 8 + 20] // pix 19356334808d9d40071249ba9b51b65aa4e3b6e7f43ffbarchard@google.com movdqa xmm7, kABGRToU 19366334808d9d40071249ba9b51b65aa4e3b6e7f43ffbarchard@google.com movdqa xmm6, kABGRToV 19376334808d9d40071249ba9b51b65aa4e3b6e7f43ffbarchard@google.com movdqa xmm5, kAddUV128 19389394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com sub edi, edx // stride from u to v 19399394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 1940c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 1941eaedc1d72735e68d45a0b42221a04902e648a21dfbarchard@google.com convertloop: 19429394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com /* step 1 - subsample 16x2 argb pixels to 8x1 */ 19439394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm0, [eax] 19449394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm1, [eax + 16] 19459394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm2, [eax + 32] 19469394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm3, [eax + 48] 19479394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm0, [eax + esi] 19489394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm1, [eax + esi + 16] 19499394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm2, [eax + esi + 32] 19509394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm3, [eax + esi + 48] 19519394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com lea eax, [eax + 64] 19529394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm4, xmm0 19539394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com shufps xmm0, xmm1, 0x88 19549394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com shufps xmm4, xmm1, 0xdd 19559394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm0, xmm4 19569394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm4, xmm2 19579394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com shufps xmm2, xmm3, 0x88 19589394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com shufps xmm4, xmm3, 0xdd 19599394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pavgb xmm2, xmm4 19609394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 19619394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com // step 2 - convert to U and V 19629394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com // from here down is very similar to Y code except 19639394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com // instead of 16 different pixels, its 8 pixels of U and 8 of V 19649394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm1, xmm0 19659394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movdqa xmm3, xmm2 19669394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pmaddubsw xmm0, xmm7 // U 19679394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pmaddubsw xmm2, xmm7 19689394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pmaddubsw xmm1, xmm6 // V 19699394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pmaddubsw xmm3, xmm6 1970b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com phaddw xmm0, xmm2 1971b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com phaddw xmm1, xmm3 1972b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com psraw xmm0, 8 1973b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com psraw xmm1, 8 1974b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com packsswb xmm0, xmm1 1975b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com paddb xmm0, xmm5 // -> unsigned 1976b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 1977b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com // step 3 - store 8 U and 8 V values 197818184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com sub ecx, 16 1979b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movlps qword ptr [edx], xmm0 // U 1980b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movhps qword ptr [edx + edi], xmm0 // V 1981b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com lea edx, [edx + 8] 198218184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 198318184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com 1984b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pop edi 1985b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pop esi 1986b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com ret 1987b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com } 1988b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com} 1989b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 1990d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 1991b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.comvoid ABGRToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, 1992b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com uint8* dst_u, uint8* dst_v, int width) { 1993f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 1994b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com push esi 1995b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com push edi 1996b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov eax, [esp + 8 + 4] // src_argb 1997b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov esi, [esp + 8 + 8] // src_stride_argb 1998b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov edx, [esp + 8 + 12] // dst_u 1999b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov edi, [esp + 8 + 16] // dst_v 2000b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com mov ecx, [esp + 8 + 20] // pix 2001b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm7, kABGRToU 2002b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm6, kABGRToV 2003b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm5, kAddUV128 2004b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com sub edi, edx // stride from u to v 2005b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 2006c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 2007b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com convertloop: 2008b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com /* step 1 - subsample 16x2 argb pixels to 8x1 */ 2009b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm0, [eax] 2010b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm1, [eax + 16] 2011b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm2, [eax + 32] 2012b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm3, [eax + 48] 2013b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm4, [eax + esi] 2014b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm0, xmm4 2015b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm4, [eax + esi + 16] 2016b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm1, xmm4 2017b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm4, [eax + esi + 32] 2018b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm2, xmm4 2019b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqu xmm4, [eax + esi + 48] 2020b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm3, xmm4 2021b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com lea eax, [eax + 64] 2022b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm4, xmm0 2023b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com shufps xmm0, xmm1, 0x88 2024b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com shufps xmm4, xmm1, 0xdd 2025b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm0, xmm4 2026b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm4, xmm2 2027b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com shufps xmm2, xmm3, 0x88 2028b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com shufps xmm4, xmm3, 0xdd 2029b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pavgb xmm2, xmm4 2030b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com 2031b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com // step 2 - convert to U and V 2032b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com // from here down is very similar to Y code except 2033b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com // instead of 16 different pixels, its 8 pixels of U and 8 of V 2034b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm1, xmm0 2035b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com movdqa xmm3, xmm2 2036b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm0, xmm7 // U 2037b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm2, xmm7 2038b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm1, xmm6 // V 2039b5b27d131adf623aa98109fe4196cd492c2d8b60fbarchard@google.com pmaddubsw xmm3, xmm6 20409394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com phaddw xmm0, xmm2 20419394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com phaddw xmm1, xmm3 20429394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com psraw xmm0, 8 20439394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com psraw xmm1, 8 20449394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com packsswb xmm0, xmm1 20459394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com paddb xmm0, xmm5 // -> unsigned 20469394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com 20479394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com // step 3 - store 8 U and 8 V values 204818184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com sub ecx, 16 20499394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movlps qword ptr [edx], xmm0 // U 20509394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com movhps qword ptr [edx + edi], xmm0 // V 20519394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com lea edx, [edx + 8] 205218184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 205318184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com 20549394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pop edi 20559394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com pop esi 20569394ed99fcc9802a068ba4a44c36aed79ce87157fbarchard@google.com ret 2057585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com } 2058585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com} 205925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 206025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com__declspec(naked) __declspec(align(16)) 206125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.comvoid RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, 206225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com uint8* dst_u, uint8* dst_v, int width) { 2063f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 206425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com push esi 206525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com push edi 206625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov eax, [esp + 8 + 4] // src_argb 206725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov esi, [esp + 8 + 8] // src_stride_argb 206825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov edx, [esp + 8 + 12] // dst_u 206925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov edi, [esp + 8 + 16] // dst_v 207025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov ecx, [esp + 8 + 20] // pix 207125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm7, kRGBAToU 207225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm6, kRGBAToV 207325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm5, kAddUV128 207425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com sub edi, edx // stride from u to v 207525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 2076c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 207725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com convertloop: 207825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com /* step 1 - subsample 16x2 argb pixels to 8x1 */ 207925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm0, [eax] 208025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm1, [eax + 16] 208125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm2, [eax + 32] 208225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm3, [eax + 48] 208325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pavgb xmm0, [eax + esi] 208425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pavgb xmm1, [eax + esi + 16] 208525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pavgb xmm2, [eax + esi + 32] 208625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pavgb xmm3, [eax + esi + 48] 208725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com lea eax, [eax + 64] 208825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm4, xmm0 208925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com shufps xmm0, xmm1, 0x88 209025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com shufps xmm4, xmm1, 0xdd 209125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pavgb xmm0, xmm4 209225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm4, xmm2 209325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com shufps xmm2, xmm3, 0x88 209425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com shufps xmm4, xmm3, 0xdd 209525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pavgb xmm2, xmm4 209625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 209725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com // step 2 - convert to U and V 209825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com // from here down is very similar to Y code except 209925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com // instead of 16 different pixels, its 8 pixels of U and 8 of V 210025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm1, xmm0 210125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm3, xmm2 210225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pmaddubsw xmm0, xmm7 // U 210325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pmaddubsw xmm2, xmm7 210425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pmaddubsw xmm1, xmm6 // V 210525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pmaddubsw xmm3, xmm6 210625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com phaddw xmm0, xmm2 210725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com phaddw xmm1, xmm3 210825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com psraw xmm0, 8 210925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com psraw xmm1, 8 211025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com packsswb xmm0, xmm1 211125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com paddb xmm0, xmm5 // -> unsigned 211225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 211325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com // step 3 - store 8 U and 8 V values 211425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com sub ecx, 16 211525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movlps qword ptr [edx], xmm0 // U 211625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movhps qword ptr [edx + edi], xmm0 // V 211725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com lea edx, [edx + 8] 211825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com jg convertloop 211925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 212025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pop edi 212125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pop esi 212225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com ret 212325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com } 212425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com} 212525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 212625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com__declspec(naked) __declspec(align(16)) 212725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.comvoid RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, 212825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com uint8* dst_u, uint8* dst_v, int width) { 2129f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 213025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com push esi 213125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com push edi 213225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov eax, [esp + 8 + 4] // src_argb 213325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov esi, [esp + 8 + 8] // src_stride_argb 213425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov edx, [esp + 8 + 12] // dst_u 213525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov edi, [esp + 8 + 16] // dst_v 213625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov ecx, [esp + 8 + 20] // pix 213725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm7, kRGBAToU 213825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm6, kRGBAToV 213925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm5, kAddUV128 214025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com sub edi, edx // stride from u to v 214125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 2142c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 214325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com convertloop: 214425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com /* step 1 - subsample 16x2 argb pixels to 8x1 */ 214525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu xmm0, [eax] 214625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu xmm1, [eax + 16] 214725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu xmm2, [eax + 32] 214825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu xmm3, [eax + 48] 214925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu xmm4, [eax + esi] 215025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pavgb xmm0, xmm4 215125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu xmm4, [eax + esi + 16] 215225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pavgb xmm1, xmm4 215325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu xmm4, [eax + esi + 32] 215425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pavgb xmm2, xmm4 215525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu xmm4, [eax + esi + 48] 215625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pavgb xmm3, xmm4 215725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com lea eax, [eax + 64] 215825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm4, xmm0 215925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com shufps xmm0, xmm1, 0x88 216025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com shufps xmm4, xmm1, 0xdd 216125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pavgb xmm0, xmm4 216225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm4, xmm2 216325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com shufps xmm2, xmm3, 0x88 216425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com shufps xmm4, xmm3, 0xdd 216525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pavgb xmm2, xmm4 216625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 216725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com // step 2 - convert to U and V 216825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com // from here down is very similar to Y code except 216925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com // instead of 16 different pixels, its 8 pixels of U and 8 of V 217025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm1, xmm0 217125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm3, xmm2 217225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pmaddubsw xmm0, xmm7 // U 217325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pmaddubsw xmm2, xmm7 217425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pmaddubsw xmm1, xmm6 // V 217525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pmaddubsw xmm3, xmm6 217625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com phaddw xmm0, xmm2 217725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com phaddw xmm1, xmm3 217825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com psraw xmm0, 8 217925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com psraw xmm1, 8 218025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com packsswb xmm0, xmm1 218125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com paddb xmm0, xmm5 // -> unsigned 218225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 218325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com // step 3 - store 8 U and 8 V values 218425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com sub ecx, 16 218525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movlps qword ptr [edx], xmm0 // U 218625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movhps qword ptr [edx + edi], xmm0 // V 218725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com lea edx, [edx + 8] 218825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com jg convertloop 218925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 219025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pop edi 219125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pop esi 219225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com ret 219325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com } 219425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com} 21954c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com#endif // HAS_ARGBTOYROW_SSSE3 2196585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com 2197c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com#ifdef HAS_I422TOARGBROW_AVX2 2198c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com 2199851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const lvec8 kUVToB_AVX = { 2200c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, 2201c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB 2202c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com}; 2203851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const lvec8 kUVToR_AVX = { 2204c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, 2205c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR 2206c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com}; 2207851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const lvec8 kUVToG_AVX = { 2208c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, 2209c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG 2210c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com}; 2211851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const lvec16 kYToRgb_AVX = { 22122b115a5237a8133a2eff060880f29198adf35eecfbarchard@google.com YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG 22132b115a5237a8133a2eff060880f29198adf35eecfbarchard@google.com}; 2214851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const lvec16 kYSub16_AVX = { 22152b115a5237a8133a2eff060880f29198adf35eecfbarchard@google.com 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 22162b115a5237a8133a2eff060880f29198adf35eecfbarchard@google.com}; 2217851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const lvec16 kUVBiasB_AVX = { 22182b115a5237a8133a2eff060880f29198adf35eecfbarchard@google.com BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB 22192b115a5237a8133a2eff060880f29198adf35eecfbarchard@google.com}; 2220851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const lvec16 kUVBiasG_AVX = { 22212b115a5237a8133a2eff060880f29198adf35eecfbarchard@google.com BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG 22222b115a5237a8133a2eff060880f29198adf35eecfbarchard@google.com}; 2223851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const lvec16 kUVBiasR_AVX = { 22242b115a5237a8133a2eff060880f29198adf35eecfbarchard@google.com BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR 22252b115a5237a8133a2eff060880f29198adf35eecfbarchard@google.com}; 2226c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com 2227c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com// 16 pixels 2228c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 2229c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com__declspec(naked) __declspec(align(16)) 2230c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.comvoid I422ToARGBRow_AVX2(const uint8* y_buf, 2231e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com const uint8* u_buf, 2232e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com const uint8* v_buf, 2233e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com uint8* dst_argb, 2234e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com int width) { 2235c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com __asm { 2236c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com push esi 2237c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com push edi 2238c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com mov eax, [esp + 8 + 4] // Y 2239c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com mov esi, [esp + 8 + 8] // U 2240c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com mov edi, [esp + 8 + 12] // V 2241c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com mov edx, [esp + 8 + 16] // argb 2242c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com mov ecx, [esp + 8 + 20] // width 2243c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com sub edi, esi 2244c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2245c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com vpxor ymm4, ymm4, ymm4 2246c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com 2247c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 2248c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com convertloop: 2249c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com vmovq xmm0, qword ptr [esi] // U 2250c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com vmovq xmm1, qword ptr [esi + edi] // V 2251cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com lea esi, [esi + 8] 2252cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpunpcklbw ymm0, ymm0, ymm1 // UV 2253c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com vpermq ymm0, ymm0, 0xd8 2254c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com vpunpcklwd ymm0, ymm0, ymm0 // UVUV 2255cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpmaddubsw ymm2, ymm0, kUVToB_AVX // scale B UV 2256cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpmaddubsw ymm1, ymm0, kUVToG_AVX // scale G UV 2257cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpmaddubsw ymm0, ymm0, kUVToR_AVX // scale R UV 2258cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpsubw ymm2, ymm2, kUVBiasB_AVX // unbias back to signed 2259cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpsubw ymm1, ymm1, kUVBiasG_AVX 2260cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpsubw ymm0, ymm0, kUVBiasR_AVX 2261cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com 2262cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com // Step 2: Find Y contribution to 16 R,G,B values 2263cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vmovdqu xmm3, [eax] // NOLINT 2264cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com lea eax, [eax + 16] 2265c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com vpermq ymm3, ymm3, 0xd8 2266c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com vpunpcklbw ymm3, ymm3, ymm4 2267cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpsubsw ymm3, ymm3, kYSub16_AVX 2268cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpmullw ymm3, ymm3, kYToRgb_AVX 2269cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpaddsw ymm2, ymm2, ymm3 // B += Y 2270cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpaddsw ymm1, ymm1, ymm3 // G += Y 2271cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpaddsw ymm0, ymm0, ymm3 // R += Y 2272cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpsraw ymm2, ymm2, 6 2273cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpsraw ymm1, ymm1, 6 2274cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpsraw ymm0, ymm0, 6 2275c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com vpackuswb ymm2, ymm2, ymm2 // B 2276c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com vpackuswb ymm1, ymm1, ymm1 // G 2277cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpackuswb ymm0, ymm0, ymm0 // R 2278c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com 2279c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com // Step 3: Weave into ARGB 2280c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com vpunpcklbw ymm2, ymm2, ymm1 // BG 2281c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com vpermq ymm2, ymm2, 0xd8 2282c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com vpunpcklbw ymm0, ymm0, ymm5 // RA 2283c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com vpermq ymm0, ymm0, 0xd8 2284cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpunpcklwd ymm1, ymm2, ymm0 // BGRA first 8 pixels 2285cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com vpunpckhwd ymm2, ymm2, ymm0 // BGRA next 8 pixels 2286c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com vmovdqu [edx], ymm1 2287c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com vmovdqu [edx + 32], ymm2 2288c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com lea edx, [edx + 64] 2289c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com sub ecx, 16 2290c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com jg convertloop 2291c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com vzeroupper 2292c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com 2293c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com pop edi 2294c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com pop esi 2295c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com ret 2296c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com } 2297c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com} 2298c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com#endif // HAS_I422TOARGBROW_AVX2 2299c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com 2300c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com#ifdef HAS_I422TOARGBROW_SSSE3 2301c297d103f199dc8c9565ea0f35bdb0832a9d10b8fbarchard@google.com 2302c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// TODO(fbarchard): Read that does half size on Y and treats 420 as 444. 2303e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com 230447e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com// Read 8 UV from 444. 2305b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com#define READYUV444 __asm { \ 2306bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com __asm movq xmm0, qword ptr [esi] /* U */ /* NOLINT */ \ 2307bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com __asm movq xmm1, qword ptr [esi + edi] /* V */ /* NOLINT */ \ 2308e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm lea esi, [esi + 8] \ 2309e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm punpcklbw xmm0, xmm1 /* UV */ \ 2310e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com } 2311e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com 2312c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Read 4 UV from 422, upsample to 8 UV. 2313b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com#define READYUV422 __asm { \ 2314d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com __asm movd xmm0, [esi] /* U */ \ 2315d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com __asm movd xmm1, [esi + edi] /* V */ \ 2316d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com __asm lea esi, [esi + 4] \ 2317d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com __asm punpcklbw xmm0, xmm1 /* UV */ \ 2318d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ 2319d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com } 2320d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 2321c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Read 2 UV from 411, upsample to 8 UV. 2322b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com#define READYUV411 __asm { \ 23230d19fc5ed37530b1feae839db7b9d1242a9f407ffbarchard@google.com __asm movzx ebx, word ptr [esi] /* U */ /* NOLINT */ \ 232447e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com __asm movd xmm0, ebx \ 23250d19fc5ed37530b1feae839db7b9d1242a9f407ffbarchard@google.com __asm movzx ebx, word ptr [esi + edi] /* V */ /* NOLINT */ \ 232647e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com __asm movd xmm1, ebx \ 2327e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm lea esi, [esi + 2] \ 2328e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm punpcklbw xmm0, xmm1 /* UV */ \ 2329e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ 2330e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm punpckldq xmm0, xmm0 /* UVUV (upsample) */ \ 23314c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com } 23324c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com 2333c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Read 4 UV from NV12, upsample to 8 UV. 2334b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com#define READNV12 __asm { \ 2335bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com __asm movq xmm0, qword ptr [esi] /* UV */ /* NOLINT */ \ 23362d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm lea esi, [esi + 8] \ 23372d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ 23382d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com } 23392d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 2340c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Convert 8 pixels: 8 UV and 8 Y. 2341b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com#define YUVTORGB __asm { \ 23424c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \ 2343e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm movdqa xmm1, xmm0 \ 2344e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm movdqa xmm2, xmm0 \ 2345e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm pmaddubsw xmm0, kUVToB /* scale B UV */ \ 2346e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm pmaddubsw xmm1, kUVToG /* scale G UV */ \ 2347e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm pmaddubsw xmm2, kUVToR /* scale R UV */ \ 2348e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm psubw xmm0, kUVBiasB /* unbias back to signed */ \ 2349e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm psubw xmm1, kUVBiasG \ 2350e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm psubw xmm2, kUVBiasR \ 2351e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com /* Step 2: Find Y contribution to 8 R,G,B values */ \ 2352e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm movq xmm3, qword ptr [eax] /* NOLINT */ \ 2353e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm lea eax, [eax + 8] \ 2354e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm punpcklbw xmm3, xmm4 \ 2355e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm psubsw xmm3, kYSub16 \ 2356e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm pmullw xmm3, kYToRgb \ 2357e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm paddsw xmm0, xmm3 /* B += Y */ \ 2358e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm paddsw xmm1, xmm3 /* G += Y */ \ 2359e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm paddsw xmm2, xmm3 /* R += Y */ \ 2360e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm psraw xmm0, 6 \ 2361e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm psraw xmm1, 6 \ 2362e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm psraw xmm2, 6 \ 2363e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm packuswb xmm0, xmm0 /* B */ \ 2364e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm packuswb xmm1, xmm1 /* G */ \ 2365e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm packuswb xmm2, xmm2 /* R */ \ 2366e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com } 2367e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com 2368c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Convert 8 pixels: 8 VU and 8 Y. 2369b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com#define YVUTORGB __asm { \ 23702d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \ 23712d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm movdqa xmm1, xmm0 \ 23722d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm movdqa xmm2, xmm0 \ 23732d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm pmaddubsw xmm0, kVUToB /* scale B UV */ \ 23742d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm pmaddubsw xmm1, kVUToG /* scale G UV */ \ 23752d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm pmaddubsw xmm2, kVUToR /* scale R UV */ \ 23762d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm psubw xmm0, kUVBiasB /* unbias back to signed */ \ 23772d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm psubw xmm1, kUVBiasG \ 23782d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm psubw xmm2, kUVBiasR \ 23792d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com /* Step 2: Find Y contribution to 8 R,G,B values */ \ 23802d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm movq xmm3, qword ptr [eax] /* NOLINT */ \ 23812d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm lea eax, [eax + 8] \ 23822d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm punpcklbw xmm3, xmm4 \ 23832d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm psubsw xmm3, kYSub16 \ 23842d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm pmullw xmm3, kYToRgb \ 23852d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm paddsw xmm0, xmm3 /* B += Y */ \ 23862d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm paddsw xmm1, xmm3 /* G += Y */ \ 23872d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm paddsw xmm2, xmm3 /* R += Y */ \ 23882d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm psraw xmm0, 6 \ 23892d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm psraw xmm1, 6 \ 23902d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm psraw xmm2, 6 \ 23912d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm packuswb xmm0, xmm0 /* B */ \ 23922d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm packuswb xmm1, xmm1 /* G */ \ 23932d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm packuswb xmm2, xmm2 /* R */ \ 23942d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com } 23952d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 2396e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com// 8 pixels, dest aligned 16. 2397c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). 2398d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 2399e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.comvoid I444ToARGBRow_SSSE3(const uint8* y_buf, 2400e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com const uint8* u_buf, 2401e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com const uint8* v_buf, 2402bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_argb, 2403e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com int width) { 2404d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com __asm { 2405d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com push esi 2406d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com push edi 2407d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com mov eax, [esp + 8 + 4] // Y 2408d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com mov esi, [esp + 8 + 8] // U 2409d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com mov edi, [esp + 8 + 12] // V 2410e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com mov edx, [esp + 8 + 16] // argb 2411d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com mov ecx, [esp + 8 + 20] // width 2412d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com sub edi, esi 2413d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2414d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com pxor xmm4, xmm4 2415d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 2416c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 2417eaedc1d72735e68d45a0b42221a04902e648a21dfbarchard@google.com convertloop: 24184c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com READYUV444 24194c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com YUVTORGB 2420d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 2421d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com // Step 3: Weave into ARGB 2422d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com punpcklbw xmm0, xmm1 // BG 2423d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com punpcklbw xmm2, xmm5 // RA 2424d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com movdqa xmm1, xmm0 2425d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com punpcklwd xmm0, xmm2 // BGRA first 4 pixels 2426d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com punpckhwd xmm1, xmm2 // BGRA next 4 pixels 24273fe369661abbd1bbca12bd69dc8be0be9a5f9792fbarchard@google.com movdqa [edx], xmm0 2428d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com movdqa [edx + 16], xmm1 2429d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com lea edx, [edx + 32] 2430d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com sub ecx, 8 243118184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 2432d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 2433d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com pop edi 2434d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com pop esi 2435d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com ret 2436d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com } 2437d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com} 2438d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 2439e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com// 8 pixels, dest aligned 16. 2440c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 2441d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 2442827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.comvoid I422ToRGB24Row_SSSE3(const uint8* y_buf, 2443827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com const uint8* u_buf, 2444827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com const uint8* v_buf, 2445bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_rgb24, 2446827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com int width) { 2447827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com __asm { 2448827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com push esi 2449827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com push edi 2450827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com mov eax, [esp + 8 + 4] // Y 2451827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com mov esi, [esp + 8 + 8] // U 2452827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com mov edi, [esp + 8 + 12] // V 2453827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com mov edx, [esp + 8 + 16] // rgb24 2454827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com mov ecx, [esp + 8 + 20] // width 2455827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com sub edi, esi 2456827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com pxor xmm4, xmm4 2457827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com movdqa xmm5, kShuffleMaskARGBToRGB24_0 2458827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com movdqa xmm6, kShuffleMaskARGBToRGB24 2459827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com 2460c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 2461827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com convertloop: 2462827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com READYUV422 2463827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com YUVTORGB 2464827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com 2465827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com // Step 3: Weave into RRGB 2466827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com punpcklbw xmm0, xmm1 // BG 2467827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com punpcklbw xmm2, xmm2 // RR 2468827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com movdqa xmm1, xmm0 2469827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com punpcklwd xmm0, xmm2 // BGRR first 4 pixels 2470827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com punpckhwd xmm1, xmm2 // BGRR next 4 pixels 2471827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com pshufb xmm0, xmm5 // Pack into first 8 and last 4 bytes. 2472827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com pshufb xmm1, xmm6 // Pack into first 12 bytes. 2473827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com palignr xmm1, xmm0, 12 // last 4 bytes of xmm0 + 12 from xmm1 2474827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com movq qword ptr [edx], xmm0 // First 8 bytes 2475827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com movdqu [edx + 8], xmm1 // Last 16 bytes. = 24 bytes, 8 RGB pixels. 2476827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com lea edx, [edx + 24] 2477827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com sub ecx, 8 2478827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com jg convertloop 2479827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com 2480827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com pop edi 2481827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com pop esi 2482827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com ret 2483827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com } 2484827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com} 2485827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com 2486827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com// 8 pixels, dest aligned 16. 2487827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 2488827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com__declspec(naked) __declspec(align(16)) 2489827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.comvoid I422ToRAWRow_SSSE3(const uint8* y_buf, 2490827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com const uint8* u_buf, 2491827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com const uint8* v_buf, 2492bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_raw, 2493827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com int width) { 2494827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com __asm { 2495827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com push esi 2496827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com push edi 2497827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com mov eax, [esp + 8 + 4] // Y 2498827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com mov esi, [esp + 8 + 8] // U 2499827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com mov edi, [esp + 8 + 12] // V 2500827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com mov edx, [esp + 8 + 16] // raw 2501827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com mov ecx, [esp + 8 + 20] // width 2502827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com sub edi, esi 2503827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com pxor xmm4, xmm4 2504827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com movdqa xmm5, kShuffleMaskARGBToRAW_0 2505827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com movdqa xmm6, kShuffleMaskARGBToRAW 2506827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com 2507c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 2508827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com convertloop: 2509827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com READYUV422 2510827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com YUVTORGB 2511827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com 2512827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com // Step 3: Weave into RRGB 2513827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com punpcklbw xmm0, xmm1 // BG 2514827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com punpcklbw xmm2, xmm2 // RR 2515827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com movdqa xmm1, xmm0 2516827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com punpcklwd xmm0, xmm2 // BGRR first 4 pixels 2517827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com punpckhwd xmm1, xmm2 // BGRR next 4 pixels 2518827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com pshufb xmm0, xmm5 // Pack into first 8 and last 4 bytes. 2519827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com pshufb xmm1, xmm6 // Pack into first 12 bytes. 2520827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com palignr xmm1, xmm0, 12 // last 4 bytes of xmm0 + 12 from xmm1 2521827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com movq qword ptr [edx], xmm0 // First 8 bytes 2522827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com movdqu [edx + 8], xmm1 // Last 16 bytes. = 24 bytes, 8 RGB pixels. 2523827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com lea edx, [edx + 24] 2524827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com sub ecx, 8 2525827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com jg convertloop 2526827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com 2527827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com pop edi 2528827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com pop esi 2529827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com ret 2530827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com } 2531827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com} 2532827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com 2533af1aa56f0b4a5fcac6c36a1a0c02b6917f2c14f2fbarchard@google.com// 8 pixels, dest unaligned. 2534827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 2535827de16bb1fa9fc5cb7237a8c32378cc3e30ae2dfbarchard@google.com__declspec(naked) __declspec(align(16)) 253615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.comvoid I422ToRGB565Row_SSSE3(const uint8* y_buf, 253715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com const uint8* u_buf, 253815449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com const uint8* v_buf, 253915449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com uint8* rgb565_buf, 254015449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com int width) { 254115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com __asm { 254215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com push esi 254315449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com push edi 254415449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com mov eax, [esp + 8 + 4] // Y 254515449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com mov esi, [esp + 8 + 8] // U 254615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com mov edi, [esp + 8 + 12] // V 254715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com mov edx, [esp + 8 + 16] // rgb565 254815449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com mov ecx, [esp + 8 + 20] // width 254915449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com sub edi, esi 255015449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com pxor xmm4, xmm4 255115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0x0000001f 255215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com psrld xmm5, 27 255315449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com pcmpeqb xmm6, xmm6 // generate mask 0x000007e0 255415449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com psrld xmm6, 26 255515449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com pslld xmm6, 5 255615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com pcmpeqb xmm7, xmm7 // generate mask 0xfffff800 255715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com pslld xmm7, 11 255815449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com 2559c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 256015449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com convertloop: 256115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com READYUV422 256215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com YUVTORGB 256315449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com 256415449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com // Step 3: Weave into RRGB 256515449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com punpcklbw xmm0, xmm1 // BG 256615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com punpcklbw xmm2, xmm2 // RR 256715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com movdqa xmm1, xmm0 256815449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com punpcklwd xmm0, xmm2 // BGRR first 4 pixels 256915449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com punpckhwd xmm1, xmm2 // BGRR next 4 pixels 257015449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com 257115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com // Step 3b: RRGB -> RGB565 257215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com movdqa xmm3, xmm0 // B first 4 pixels of argb 257315449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com movdqa xmm2, xmm0 // G 257415449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com pslld xmm0, 8 // R 257515449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com psrld xmm3, 3 // B 257615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com psrld xmm2, 5 // G 257715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com psrad xmm0, 16 // R 257815449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com pand xmm3, xmm5 // B 257915449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com pand xmm2, xmm6 // G 258015449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com pand xmm0, xmm7 // R 258115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com por xmm3, xmm2 // BG 258215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com por xmm0, xmm3 // BGR 258315449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com movdqa xmm3, xmm1 // B next 4 pixels of argb 258415449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com movdqa xmm2, xmm1 // G 258515449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com pslld xmm1, 8 // R 258615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com psrld xmm3, 3 // B 258715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com psrld xmm2, 5 // G 258815449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com psrad xmm1, 16 // R 258915449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com pand xmm3, xmm5 // B 259015449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com pand xmm2, xmm6 // G 259115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com pand xmm1, xmm7 // R 259215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com por xmm3, xmm2 // BG 259315449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com por xmm1, xmm3 // BGR 259415449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com packssdw xmm0, xmm1 259515449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com sub ecx, 8 2596af1aa56f0b4a5fcac6c36a1a0c02b6917f2c14f2fbarchard@google.com movdqu [edx], xmm0 // store 8 pixels of RGB565 259715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com lea edx, [edx + 16] 259815449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com jg convertloop 259915449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com 260015449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com pop edi 260115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com pop esi 260215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com ret 260315449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com } 260415449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com} 260515449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com 260615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com// 8 pixels, dest aligned 16. 260715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 260815449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com__declspec(naked) __declspec(align(16)) 2609e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.comvoid I422ToARGBRow_SSSE3(const uint8* y_buf, 2610e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com const uint8* u_buf, 2611e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com const uint8* v_buf, 2612bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_argb, 2613e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com int width) { 2614d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com __asm { 2615d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com push esi 2616d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com push edi 2617d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com mov eax, [esp + 8 + 4] // Y 2618d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com mov esi, [esp + 8 + 8] // U 2619d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com mov edi, [esp + 8 + 12] // V 2620e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com mov edx, [esp + 8 + 16] // argb 2621d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com mov ecx, [esp + 8 + 20] // width 2622d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com sub edi, esi 2623e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2624d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com pxor xmm4, xmm4 2625d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 2626c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 2627eaedc1d72735e68d45a0b42221a04902e648a21dfbarchard@google.com convertloop: 26284c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com READYUV422 26294c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com YUVTORGB 2630d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 2631e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com // Step 3: Weave into ARGB 2632e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com punpcklbw xmm0, xmm1 // BG 2633e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com punpcklbw xmm2, xmm5 // RA 2634e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com movdqa xmm1, xmm0 2635e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com punpcklwd xmm0, xmm2 // BGRA first 4 pixels 2636e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com punpckhwd xmm1, xmm2 // BGRA next 4 pixels 2637e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com movdqa [edx], xmm0 2638e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com movdqa [edx + 16], xmm1 2639d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com lea edx, [edx + 32] 2640d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com sub ecx, 8 264118184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 2642d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 2643d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com pop edi 2644d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com pop esi 2645d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com ret 2646d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com } 2647d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com} 2648d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 2649e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com// 8 pixels, dest aligned 16. 2650c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 2651e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com// Similar to I420 but duplicate UV once more. 2652d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 2653e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.comvoid I411ToARGBRow_SSSE3(const uint8* y_buf, 2654e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com const uint8* u_buf, 2655e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com const uint8* v_buf, 2656bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_argb, 2657e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com int width) { 2658d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com __asm { 265947e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com push ebx 2660d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com push esi 2661d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com push edi 266247e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com mov eax, [esp + 12 + 4] // Y 266347e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com mov esi, [esp + 12 + 8] // U 266447e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com mov edi, [esp + 12 + 12] // V 266547e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com mov edx, [esp + 12 + 16] // argb 266647e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com mov ecx, [esp + 12 + 20] // width 2667d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com sub edi, esi 2668d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2669d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com pxor xmm4, xmm4 2670d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 2671c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 2672eaedc1d72735e68d45a0b42221a04902e648a21dfbarchard@google.com convertloop: 267347e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com READYUV411 // modifies EBX 26744c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com YUVTORGB 2675d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 2676d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com // Step 3: Weave into ARGB 2677e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com punpcklbw xmm0, xmm1 // BG 2678e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com punpcklbw xmm2, xmm5 // RA 2679e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com movdqa xmm1, xmm0 2680e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com punpcklwd xmm0, xmm2 // BGRA first 4 pixels 2681e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com punpckhwd xmm1, xmm2 // BGRA next 4 pixels 2682e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com movdqa [edx], xmm0 2683d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com movdqa [edx + 16], xmm1 2684d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com lea edx, [edx + 32] 2685d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com sub ecx, 8 268618184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 2687d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 2688d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com pop edi 2689d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com pop esi 269047e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com pop ebx 2691d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com ret 2692d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com } 2693d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com} 2694d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 26952d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com// 8 pixels, dest aligned 16. 2696c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 26972d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com__declspec(naked) __declspec(align(16)) 26982d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.comvoid NV12ToARGBRow_SSSE3(const uint8* y_buf, 26992d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com const uint8* uv_buf, 2700bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_argb, 27012d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com int width) { 27022d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm { 27032d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com push esi 27042d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com mov eax, [esp + 4 + 4] // Y 27052d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com mov esi, [esp + 4 + 8] // UV 27062d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com mov edx, [esp + 4 + 12] // argb 27072d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com mov ecx, [esp + 4 + 16] // width 27082d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 27092d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com pxor xmm4, xmm4 27102d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 2711c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 27122d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com convertloop: 27132d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com READNV12 27142d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com YUVTORGB 27152d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 27162d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com // Step 3: Weave into ARGB 27172d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com punpcklbw xmm0, xmm1 // BG 27182d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com punpcklbw xmm2, xmm5 // RA 27192d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com movdqa xmm1, xmm0 27202d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com punpcklwd xmm0, xmm2 // BGRA first 4 pixels 27212d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com punpckhwd xmm1, xmm2 // BGRA next 4 pixels 27222d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com movdqa [edx], xmm0 27232d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com movdqa [edx + 16], xmm1 27242d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com lea edx, [edx + 32] 27252d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com sub ecx, 8 27262d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com jg convertloop 27272d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 27282d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com pop esi 27292d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com ret 27302d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com } 27312d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com} 27322d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 27332d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com// 8 pixels, dest aligned 16. 2734c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 27352d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com__declspec(naked) __declspec(align(16)) 27362d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.comvoid NV21ToARGBRow_SSSE3(const uint8* y_buf, 27372d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com const uint8* uv_buf, 2738bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_argb, 27392d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com int width) { 27402d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm { 27412d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com push esi 27422d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com mov eax, [esp + 4 + 4] // Y 27432d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com mov esi, [esp + 4 + 8] // VU 27442d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com mov edx, [esp + 4 + 12] // argb 27452d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com mov ecx, [esp + 4 + 16] // width 27462d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 27472d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com pxor xmm4, xmm4 27482d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 2749c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 27502d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com convertloop: 27512d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com READNV12 27522d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com YVUTORGB 27532d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 27542d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com // Step 3: Weave into ARGB 27552d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com punpcklbw xmm0, xmm1 // BG 27562d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com punpcklbw xmm2, xmm5 // RA 27572d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com movdqa xmm1, xmm0 27582d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com punpcklwd xmm0, xmm2 // BGRA first 4 pixels 27592d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com punpckhwd xmm1, xmm2 // BGRA next 4 pixels 27602d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com movdqa [edx], xmm0 27612d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com movdqa [edx + 16], xmm1 27622d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com lea edx, [edx + 32] 27632d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com sub ecx, 8 27642d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com jg convertloop 27652d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 27662d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com pop esi 27672d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com ret 27682d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com } 27692d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com} 27702d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 2771e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com// 8 pixels, unaligned. 2772c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). 2773d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 2774e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.comvoid I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, 2775952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com const uint8* u_buf, 2776952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com const uint8* v_buf, 2777bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_argb, 2778952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com int width) { 2779952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com __asm { 2780952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com push esi 2781952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com push edi 2782952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com mov eax, [esp + 8 + 4] // Y 2783952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com mov esi, [esp + 8 + 8] // U 2784952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com mov edi, [esp + 8 + 12] // V 2785e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com mov edx, [esp + 8 + 16] // argb 2786952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com mov ecx, [esp + 8 + 20] // width 2787952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com sub edi, esi 2788952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2789952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com pxor xmm4, xmm4 2790952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com 2791c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 2792952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com convertloop: 27934c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com READYUV444 27944c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com YUVTORGB 2795952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com 2796952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com // Step 3: Weave into ARGB 2797952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com punpcklbw xmm0, xmm1 // BG 2798952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com punpcklbw xmm2, xmm5 // RA 2799952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com movdqa xmm1, xmm0 2800952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com punpcklwd xmm0, xmm2 // BGRA first 4 pixels 2801952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com punpckhwd xmm1, xmm2 // BGRA next 4 pixels 28024c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com movdqu [edx], xmm0 28034c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com movdqu [edx + 16], xmm1 2804e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com lea edx, [edx + 32] 2805e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com sub ecx, 8 2806e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com jg convertloop 2807e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com 2808e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com pop edi 2809e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com pop esi 2810e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com ret 2811e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com } 2812e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com} 2813e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com 2814e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com// 8 pixels, unaligned. 2815c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 2816e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com__declspec(naked) __declspec(align(16)) 2817e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.comvoid I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, 2818e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com const uint8* u_buf, 2819e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com const uint8* v_buf, 2820bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_argb, 2821e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com int width) { 2822e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm { 2823e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com push esi 2824e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com push edi 2825e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com mov eax, [esp + 8 + 4] // Y 2826e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com mov esi, [esp + 8 + 8] // U 2827e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com mov edi, [esp + 8 + 12] // V 2828e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com mov edx, [esp + 8 + 16] // argb 2829e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com mov ecx, [esp + 8 + 20] // width 2830e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com sub edi, esi 2831e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2832e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com pxor xmm4, xmm4 2833e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com 2834c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 2835e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com convertloop: 28364c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com READYUV422 28374c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com YUVTORGB 2838e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com 2839e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com // Step 3: Weave into ARGB 2840e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com punpcklbw xmm0, xmm1 // BG 2841e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com punpcklbw xmm2, xmm5 // RA 2842e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com movdqa xmm1, xmm0 2843e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com punpcklwd xmm0, xmm2 // BGRA first 4 pixels 2844e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com punpckhwd xmm1, xmm2 // BGRA next 4 pixels 28454c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com movdqu [edx], xmm0 28464c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com movdqu [edx + 16], xmm1 2847952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com lea edx, [edx + 32] 2848952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com sub ecx, 8 2849952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com jg convertloop 2850952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com 2851952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com pop edi 2852952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com pop esi 2853952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com ret 2854952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com } 2855952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com} 2856952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com 2857e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com// 8 pixels, unaligned. 2858c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 2859e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com// Similar to I420 but duplicate UV once more. 2860d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 2861e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.comvoid I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, 2862952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com const uint8* u_buf, 2863952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com const uint8* v_buf, 2864bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_argb, 2865952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com int width) { 2866952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com __asm { 286747e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com push ebx 2868952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com push esi 2869952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com push edi 287047e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com mov eax, [esp + 12 + 4] // Y 287147e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com mov esi, [esp + 12 + 8] // U 287247e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com mov edi, [esp + 12 + 12] // V 287347e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com mov edx, [esp + 12 + 16] // argb 287447e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com mov ecx, [esp + 12 + 20] // width 2875e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com sub edi, esi 2876e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2877e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com pxor xmm4, xmm4 2878e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com 2879c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 2880e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com convertloop: 288147e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com READYUV411 // modifies EBX 28824c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com YUVTORGB 2883e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com 2884e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com // Step 3: Weave into ARGB 2885e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com punpcklbw xmm0, xmm1 // BG 2886e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com punpcklbw xmm2, xmm5 // RA 2887e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com movdqa xmm1, xmm0 2888e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com punpcklwd xmm0, xmm2 // BGRA first 4 pixels 2889e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com punpckhwd xmm1, xmm2 // BGRA next 4 pixels 28904c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com movdqu [edx], xmm0 28914c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com movdqu [edx + 16], xmm1 2892e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com lea edx, [edx + 32] 2893e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com sub ecx, 8 2894e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com jg convertloop 2895e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com 2896e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com pop edi 2897e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com pop esi 289847e856c632f0a310004601b86493220a6993d7b4fbarchard@google.com pop ebx 2899e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com ret 2900e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com } 2901e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com} 2902e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com 290315449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com// 8 pixels, dest aligned 16. 290415449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 290515449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com__declspec(naked) __declspec(align(16)) 29062d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.comvoid NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, 29072d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com const uint8* uv_buf, 2908bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_argb, 29092d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com int width) { 29102d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm { 29112d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com push esi 29122d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com mov eax, [esp + 4 + 4] // Y 29132d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com mov esi, [esp + 4 + 8] // UV 29142d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com mov edx, [esp + 4 + 12] // argb 29152d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com mov ecx, [esp + 4 + 16] // width 29162d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 29172d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com pxor xmm4, xmm4 29182d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 2919c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 29202d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com convertloop: 29212d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com READNV12 29222d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com YUVTORGB 29232d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 29242d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com // Step 3: Weave into ARGB 29252d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com punpcklbw xmm0, xmm1 // BG 29262d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com punpcklbw xmm2, xmm5 // RA 29272d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com movdqa xmm1, xmm0 29282d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com punpcklwd xmm0, xmm2 // BGRA first 4 pixels 29292d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com punpckhwd xmm1, xmm2 // BGRA next 4 pixels 29302d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com movdqu [edx], xmm0 29312d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com movdqu [edx + 16], xmm1 29322d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com lea edx, [edx + 32] 29332d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com sub ecx, 8 29342d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com jg convertloop 29352d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 29362d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com pop esi 29372d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com ret 29382d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com } 29392d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com} 29402d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 29412d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com// 8 pixels, dest aligned 16. 2942c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 29432d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com__declspec(naked) __declspec(align(16)) 29442d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.comvoid NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, 29452d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com const uint8* uv_buf, 2946bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_argb, 29472d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com int width) { 29482d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com __asm { 29492d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com push esi 29502d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com mov eax, [esp + 4 + 4] // Y 29512d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com mov esi, [esp + 4 + 8] // VU 29522d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com mov edx, [esp + 4 + 12] // argb 29532d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com mov ecx, [esp + 4 + 16] // width 29542d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 29552d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com pxor xmm4, xmm4 29562d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 2957c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 29582d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com convertloop: 29592d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com READNV12 29602d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com YVUTORGB 29612d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 29622d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com // Step 3: Weave into ARGB 29632d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com punpcklbw xmm0, xmm1 // BG 29642d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com punpcklbw xmm2, xmm5 // RA 29652d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com movdqa xmm1, xmm0 29662d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com punpcklwd xmm0, xmm2 // BGRA first 4 pixels 29672d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com punpckhwd xmm1, xmm2 // BGRA next 4 pixels 29682d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com movdqu [edx], xmm0 29692d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com movdqu [edx + 16], xmm1 29702d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com lea edx, [edx + 32] 29712d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com sub ecx, 8 29722d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com jg convertloop 29732d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 29742d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com pop esi 29752d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com ret 29762d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com } 29772d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com} 29782d9fe08225ab28f62b515b2b914accc6a7b060fbfbarchard@google.com 2979e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com__declspec(naked) __declspec(align(16)) 2980e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.comvoid I422ToBGRARow_SSSE3(const uint8* y_buf, 2981e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com const uint8* u_buf, 2982e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com const uint8* v_buf, 2983bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_bgra, 2984e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com int width) { 2985e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm { 2986e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com push esi 2987e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com push edi 2988e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com mov eax, [esp + 8 + 4] // Y 2989e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com mov esi, [esp + 8 + 8] // U 2990e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com mov edi, [esp + 8 + 12] // V 2991e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com mov edx, [esp + 8 + 16] // bgra 2992952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com mov ecx, [esp + 8 + 20] // width 2993952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com sub edi, esi 2994952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com pxor xmm4, xmm4 2995952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com 2996c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 2997952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com convertloop: 29984c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com READYUV422 29994c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com YUVTORGB 3000952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com 3001952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com // Step 3: Weave into BGRA 3002952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 3003952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com punpcklbw xmm1, xmm0 // GB 3004952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com punpcklbw xmm5, xmm2 // AR 3005952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com movdqa xmm0, xmm5 3006952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com punpcklwd xmm5, xmm1 // BGRA first 4 pixels 3007952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com punpckhwd xmm0, xmm1 // BGRA next 4 pixels 3008e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com movdqa [edx], xmm5 3009e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com movdqa [edx + 16], xmm0 3010952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com lea edx, [edx + 32] 3011952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com sub ecx, 8 3012952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com jg convertloop 3013952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com 3014952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com pop edi 3015952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com pop esi 3016952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com ret 3017952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com } 3018952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com} 3019952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com 3020d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 302125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.comvoid I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf, 302225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com const uint8* u_buf, 302325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com const uint8* v_buf, 3024bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_bgra, 302525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com int width) { 302625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com __asm { 302725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com push esi 302825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com push edi 302925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov eax, [esp + 8 + 4] // Y 303025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov esi, [esp + 8 + 8] // U 303125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov edi, [esp + 8 + 12] // V 303225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov edx, [esp + 8 + 16] // bgra 303325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov ecx, [esp + 8 + 20] // width 303425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com sub edi, esi 303525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pxor xmm4, xmm4 303625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 3037c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 303825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com convertloop: 303925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com READYUV422 304025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com YUVTORGB 304125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 304225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com // Step 3: Weave into BGRA 304325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 304425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com punpcklbw xmm1, xmm0 // GB 304525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com punpcklbw xmm5, xmm2 // AR 304625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm0, xmm5 304725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com punpcklwd xmm5, xmm1 // BGRA first 4 pixels 304825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com punpckhwd xmm0, xmm1 // BGRA next 4 pixels 304925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu [edx], xmm5 305025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu [edx + 16], xmm0 305125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com lea edx, [edx + 32] 305225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com sub ecx, 8 305325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com jg convertloop 305425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 305525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pop edi 305625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pop esi 305725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com ret 305825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com } 305925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com} 306025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 306125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com__declspec(naked) __declspec(align(16)) 3062e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.comvoid I422ToABGRRow_SSSE3(const uint8* y_buf, 3063e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com const uint8* u_buf, 3064e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com const uint8* v_buf, 3065bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_abgr, 3066e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com int width) { 3067952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com __asm { 3068952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com push esi 3069952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com push edi 3070952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com mov eax, [esp + 8 + 4] // Y 3071952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com mov esi, [esp + 8 + 8] // U 3072952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com mov edi, [esp + 8 + 12] // V 3073e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com mov edx, [esp + 8 + 16] // abgr 3074952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com mov ecx, [esp + 8 + 20] // width 3075952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com sub edi, esi 3076952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 3077952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com pxor xmm4, xmm4 3078952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com 3079c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 3080952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com convertloop: 30814c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com READYUV422 30824c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com YUVTORGB 3083952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com 3084952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com // Step 3: Weave into ARGB 3085952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com punpcklbw xmm2, xmm1 // RG 3086952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com punpcklbw xmm0, xmm5 // BA 3087952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com movdqa xmm1, xmm2 3088952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com punpcklwd xmm2, xmm0 // RGBA first 4 pixels 3089952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com punpckhwd xmm1, xmm0 // RGBA next 4 pixels 3090e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com movdqa [edx], xmm2 3091e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com movdqa [edx + 16], xmm1 3092952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com lea edx, [edx + 32] 3093952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com sub ecx, 8 3094952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com jg convertloop 3095952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com 3096952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com pop edi 3097952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com pop esi 3098952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com ret 3099952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com } 3100952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com} 3101952a507ca6967558c2ae773321e003b6f2bb943afbarchard@google.com 3102d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 310325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.comvoid I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf, 3104e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com const uint8* u_buf, 3105e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com const uint8* v_buf, 3106bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_abgr, 3107e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com int width) { 3108d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com __asm { 3109d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com push esi 3110d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com push edi 3111d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com mov eax, [esp + 8 + 4] // Y 3112d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com mov esi, [esp + 8 + 8] // U 3113d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com mov edi, [esp + 8 + 12] // V 311425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov edx, [esp + 8 + 16] // abgr 3115d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com mov ecx, [esp + 8 + 20] // width 3116d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com sub edi, esi 311725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 3118d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com pxor xmm4, xmm4 3119d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 3120c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 3121eaedc1d72735e68d45a0b42221a04902e648a21dfbarchard@google.com convertloop: 31224c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com READYUV422 31234c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com YUVTORGB 3124e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com 312525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com // Step 3: Weave into ARGB 312625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com punpcklbw xmm2, xmm1 // RG 312725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com punpcklbw xmm0, xmm5 // BA 312825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm1, xmm2 312925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com punpcklwd xmm2, xmm0 // RGBA first 4 pixels 313025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com punpckhwd xmm1, xmm0 // RGBA next 4 pixels 313125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu [edx], xmm2 313225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu [edx + 16], xmm1 313325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com lea edx, [edx + 32] 313425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com sub ecx, 8 313525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com jg convertloop 313625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 313725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pop edi 313825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pop esi 313925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com ret 314025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com } 314125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com} 314225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 314325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com__declspec(naked) __declspec(align(16)) 314425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.comvoid I422ToRGBARow_SSSE3(const uint8* y_buf, 314525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com const uint8* u_buf, 314625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com const uint8* v_buf, 3147bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_rgba, 314825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com int width) { 314925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com __asm { 315025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com push esi 315125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com push edi 315225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov eax, [esp + 8 + 4] // Y 315325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov esi, [esp + 8 + 8] // U 315425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov edi, [esp + 8 + 12] // V 315525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov edx, [esp + 8 + 16] // rgba 315625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov ecx, [esp + 8 + 20] // width 315725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com sub edi, esi 315825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pxor xmm4, xmm4 315925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 3160c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 316125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com convertloop: 316225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com READYUV422 316325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com YUVTORGB 316425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 316525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com // Step 3: Weave into RGBA 3166e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 316725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com punpcklbw xmm1, xmm2 // GR 316825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com punpcklbw xmm5, xmm0 // AB 3169e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com movdqa xmm0, xmm5 317025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com punpcklwd xmm5, xmm1 // RGBA first 4 pixels 317125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com punpckhwd xmm0, xmm1 // RGBA next 4 pixels 317225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa [edx], xmm5 317325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa [edx + 16], xmm0 3174e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com lea edx, [edx + 32] 3175e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com sub ecx, 8 3176e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com jg convertloop 3177e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com 3178e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com pop edi 3179e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com pop esi 3180e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com ret 3181e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com } 3182e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com} 3183e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com 3184e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com__declspec(naked) __declspec(align(16)) 318525dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.comvoid I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf, 3186e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com const uint8* u_buf, 3187e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com const uint8* v_buf, 3188bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_rgba, 3189e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com int width) { 3190e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com __asm { 3191e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com push esi 3192e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com push edi 3193e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com mov eax, [esp + 8 + 4] // Y 3194e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com mov esi, [esp + 8 + 8] // U 3195e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com mov edi, [esp + 8 + 12] // V 319625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com mov edx, [esp + 8 + 16] // rgba 3197e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com mov ecx, [esp + 8 + 20] // width 3198e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com sub edi, esi 3199e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com pxor xmm4, xmm4 3200e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com 3201c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 3202e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com convertloop: 32034c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com READYUV422 32044c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com YUVTORGB 3205d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 320625dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com // Step 3: Weave into RGBA 320725dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 320825dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com punpcklbw xmm1, xmm2 // GR 320925dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com punpcklbw xmm5, xmm0 // AB 321025dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqa xmm0, xmm5 321125dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com punpcklwd xmm5, xmm1 // RGBA first 4 pixels 321225dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com punpckhwd xmm0, xmm1 // RGBA next 4 pixels 321325dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu [edx], xmm5 321425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com movdqu [edx + 16], xmm0 3215e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com lea edx, [edx + 32] 3216e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com sub ecx, 8 321718184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 3218d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 3219d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com pop edi 3220d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com pop esi 3221d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com ret 3222d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com } 3223d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com} 322425dc05858e39843299cb66715bdd4e3edd2f89a6fbarchard@google.com 3225e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com#endif // HAS_I422TOARGBROW_SSSE3 3226d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 3227e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com#ifdef HAS_YTOARGBROW_SSE2 3228d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 3229e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.comvoid YToARGBRow_SSE2(const uint8* y_buf, 3230e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com uint8* rgb_buf, 3231e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com int width) { 3232d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com __asm { 323330859f75f28c2435753d33eb7a48ccab169feb6dfbarchard@google.com pxor xmm5, xmm5 32348b9759c4a757ee5d1f005cfececd8382c357e5fefbarchard@google.com pcmpeqb xmm4, xmm4 // generate mask 0xff000000 32358b9759c4a757ee5d1f005cfececd8382c357e5fefbarchard@google.com pslld xmm4, 24 323698a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com mov eax, 0x00100010 323798a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com movd xmm3, eax 323898a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com pshufd xmm3, xmm3, 0 323998a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com mov eax, 0x004a004a // 74 324098a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com movd xmm2, eax 324198a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com pshufd xmm2, xmm2,0 3242d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com mov eax, [esp + 4] // Y 3243d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com mov edx, [esp + 8] // rgb 3244d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com mov ecx, [esp + 12] // width 3245d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 3246c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 3247eaedc1d72735e68d45a0b42221a04902e648a21dfbarchard@google.com convertloop: 3248d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 3249373cdbdc58d6e7b7e4653840677ef01468607e84fbarchard@google.com movq xmm0, qword ptr [eax] 3250d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com lea eax, [eax + 8] 325130859f75f28c2435753d33eb7a48ccab169feb6dfbarchard@google.com punpcklbw xmm0, xmm5 // 0.Y 32528b9759c4a757ee5d1f005cfececd8382c357e5fefbarchard@google.com psubusw xmm0, xmm3 325330859f75f28c2435753d33eb7a48ccab169feb6dfbarchard@google.com pmullw xmm0, xmm2 325430859f75f28c2435753d33eb7a48ccab169feb6dfbarchard@google.com psrlw xmm0, 6 3255d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com packuswb xmm0, xmm0 // G 3256d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 3257d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com // Step 2: Weave into ARGB 3258d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com punpcklbw xmm0, xmm0 // GG 3259d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com movdqa xmm1, xmm0 3260d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com punpcklwd xmm0, xmm0 // BGRA first 4 pixels 3261d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com punpckhwd xmm1, xmm1 // BGRA next 4 pixels 32628b9759c4a757ee5d1f005cfececd8382c357e5fefbarchard@google.com por xmm0, xmm4 32638b9759c4a757ee5d1f005cfececd8382c357e5fefbarchard@google.com por xmm1, xmm4 32643fe369661abbd1bbca12bd69dc8be0be9a5f9792fbarchard@google.com movdqa [edx], xmm0 3265d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com movdqa [edx + 16], xmm1 3266d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com lea edx, [edx + 32] 3267d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com sub ecx, 8 326818184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 3269d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com 3270d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com ret 3271d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com } 3272d93d4486eb6cefba07f4707db5cce5509dc0145dfbarchard@google.com} 3273e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com#endif // HAS_YTOARGBROW_SSE2 327412d048335db029aa66396d2fc09be0612afe8b59fbarchard@google.com 327542831e0aae4c786e40302ac03bf5d679796b5c3ffbarchard@google.com#ifdef HAS_MIRRORROW_SSSE3 327612d048335db029aa66396d2fc09be0612afe8b59fbarchard@google.com// Shuffle table for reversing the bytes. 3277851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const uvec8 kShuffleMirror = { 327812d048335db029aa66396d2fc09be0612afe8b59fbarchard@google.com 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u 327912d048335db029aa66396d2fc09be0612afe8b59fbarchard@google.com}; 3280228bdc24e44264baf3402124aaa6d4d81c8896f5fbarchard@google.com 3281d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 328242831e0aae4c786e40302ac03bf5d679796b5c3ffbarchard@google.comvoid MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { 3283f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 328412d048335db029aa66396d2fc09be0612afe8b59fbarchard@google.com mov eax, [esp + 4] // src 328512d048335db029aa66396d2fc09be0612afe8b59fbarchard@google.com mov edx, [esp + 8] // dst 328612d048335db029aa66396d2fc09be0612afe8b59fbarchard@google.com mov ecx, [esp + 12] // width 328742831e0aae4c786e40302ac03bf5d679796b5c3ffbarchard@google.com movdqa xmm5, kShuffleMirror 328812d048335db029aa66396d2fc09be0612afe8b59fbarchard@google.com lea eax, [eax - 16] 3289ba3aeed3b86dfae7bc0631c8bed9b50303318dcafbarchard@google.com 3290c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 32910e6ce93c84f710e6a589c6c6edfe480ad0567f0cfbarchard@google.com convertloop: 32920e6ce93c84f710e6a589c6c6edfe480ad0567f0cfbarchard@google.com movdqa xmm0, [eax + ecx] 329312d048335db029aa66396d2fc09be0612afe8b59fbarchard@google.com pshufb xmm0, xmm5 32940e6ce93c84f710e6a589c6c6edfe480ad0567f0cfbarchard@google.com sub ecx, 16 329512d048335db029aa66396d2fc09be0612afe8b59fbarchard@google.com movdqa [edx], xmm0 329612d048335db029aa66396d2fc09be0612afe8b59fbarchard@google.com lea edx, [edx + 16] 329718184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 329812d048335db029aa66396d2fc09be0612afe8b59fbarchard@google.com ret 329912d048335db029aa66396d2fc09be0612afe8b59fbarchard@google.com } 330012d048335db029aa66396d2fc09be0612afe8b59fbarchard@google.com} 33014c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com#endif // HAS_MIRRORROW_SSSE3 3302585a126140be298e60a4daa26140ead0e94eaaa1fbarchard@google.com 33032007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com#ifdef HAS_MIRRORROW_AVX2 33042007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com// Shuffle table for reversing the bytes. 3305851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const ulvec8 kShuffleMirror_AVX2 = { 33062007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u, 33072007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u 33082007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com}; 33092007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com 33102007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com__declspec(naked) __declspec(align(16)) 33112007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.comvoid MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { 33122007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com __asm { 33132007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com mov eax, [esp + 4] // src 33142007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com mov edx, [esp + 8] // dst 33152007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com mov ecx, [esp + 12] // width 33162007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com vmovdqa ymm5, kShuffleMirror_AVX2 33172007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com lea eax, [eax - 32] 33182007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com 3319c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 33202007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com convertloop: 33212007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com vmovdqu ymm0, [eax + ecx] 33222007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com vpshufb ymm0, ymm0, ymm5 33232007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com vpermq ymm0, ymm0, 0x4e // swap high and low halfs 33242007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com sub ecx, 32 33252007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com vmovdqu [edx], ymm0 33262007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com lea edx, [edx + 32] 33272007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com jg convertloop 33289b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 33292007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com ret 33302007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com } 33312007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com} 33322007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com#endif // HAS_MIRRORROW_AVX2 33332007dca6dcfee6828f604068d7d17842fbe5d646fbarchard@google.com 333442831e0aae4c786e40302ac03bf5d679796b5c3ffbarchard@google.com#ifdef HAS_MIRRORROW_SSE2 33352d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com// SSE2 version has movdqu so it can be used on unaligned buffers when SSSE3 333642831e0aae4c786e40302ac03bf5d679796b5c3ffbarchard@google.com// version can not. 3337d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 333842831e0aae4c786e40302ac03bf5d679796b5c3ffbarchard@google.comvoid MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { 3339f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 3340373cdbdc58d6e7b7e4653840677ef01468607e84fbarchard@google.com mov eax, [esp + 4] // src 3341373cdbdc58d6e7b7e4653840677ef01468607e84fbarchard@google.com mov edx, [esp + 8] // dst 3342373cdbdc58d6e7b7e4653840677ef01468607e84fbarchard@google.com mov ecx, [esp + 12] // width 3343373cdbdc58d6e7b7e4653840677ef01468607e84fbarchard@google.com lea eax, [eax - 16] 3344ba3aeed3b86dfae7bc0631c8bed9b50303318dcafbarchard@google.com 3345c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 33460e6ce93c84f710e6a589c6c6edfe480ad0567f0cfbarchard@google.com convertloop: 334742831e0aae4c786e40302ac03bf5d679796b5c3ffbarchard@google.com movdqu xmm0, [eax + ecx] 33482d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com movdqa xmm1, xmm0 // swap bytes 3349373cdbdc58d6e7b7e4653840677ef01468607e84fbarchard@google.com psllw xmm0, 8 3350373cdbdc58d6e7b7e4653840677ef01468607e84fbarchard@google.com psrlw xmm1, 8 3351373cdbdc58d6e7b7e4653840677ef01468607e84fbarchard@google.com por xmm0, xmm1 3352373cdbdc58d6e7b7e4653840677ef01468607e84fbarchard@google.com pshuflw xmm0, xmm0, 0x1b // swap words 3353373cdbdc58d6e7b7e4653840677ef01468607e84fbarchard@google.com pshufhw xmm0, xmm0, 0x1b 335445b9ef0f6a404fe416d7a04bbd6da13037f3716bfbarchard@google.com pshufd xmm0, xmm0, 0x4e // swap qwords 33550e6ce93c84f710e6a589c6c6edfe480ad0567f0cfbarchard@google.com sub ecx, 16 335642831e0aae4c786e40302ac03bf5d679796b5c3ffbarchard@google.com movdqu [edx], xmm0 3357373cdbdc58d6e7b7e4653840677ef01468607e84fbarchard@google.com lea edx, [edx + 16] 335818184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 3359373cdbdc58d6e7b7e4653840677ef01468607e84fbarchard@google.com ret 3360373cdbdc58d6e7b7e4653840677ef01468607e84fbarchard@google.com } 3361373cdbdc58d6e7b7e4653840677ef01468607e84fbarchard@google.com} 33624c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com#endif // HAS_MIRRORROW_SSE2 3363e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 336416a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com#ifdef HAS_MIRRORROW_UV_SSSE3 336516a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com// Shuffle table for reversing the bytes of UV channels. 3366851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const uvec8 kShuffleMirrorUV = { 336716a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u 336816a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com}; 336916a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com 3370d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 3371bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, 337216a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com int width) { 337316a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com __asm { 337416a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com push edi 337516a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com mov eax, [esp + 4 + 4] // src 337616a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com mov edx, [esp + 4 + 8] // dst_u 337716a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com mov edi, [esp + 4 + 12] // dst_v 337816a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com mov ecx, [esp + 4 + 16] // width 337916a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com movdqa xmm1, kShuffleMirrorUV 338016a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com lea eax, [eax + ecx * 2 - 16] 338116a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com sub edi, edx 338216a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com 3383c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 338416a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com convertloop: 338516a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com movdqa xmm0, [eax] 338616a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com lea eax, [eax - 16] 338716a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com pshufb xmm0, xmm1 338816a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com sub ecx, 8 338916a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com movlpd qword ptr [edx], xmm0 339016a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com movhpd qword ptr [edx + edi], xmm0 339116a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com lea edx, [edx + 8] 339218184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 339316a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com 339416a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com pop edi 339516a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com ret 339616a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com } 339716a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com} 33984c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com#endif // HAS_MIRRORROW_UV_SSSE3 339916a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com 340027d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com#ifdef HAS_ARGBMIRRORROW_SSSE3 340127d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com// Shuffle table for reversing the bytes. 3402851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const uvec8 kARGBShuffleMirror = { 340327d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com 12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u 340427d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com}; 340527d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com 340627d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com__declspec(naked) __declspec(align(16)) 340727d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.comvoid ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { 3408f5f6fd2aa778c379c442a2210ddd8d2ee03e8eb6fbarchard@google.com __asm { 340927d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com mov eax, [esp + 4] // src 341027d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com mov edx, [esp + 8] // dst 341127d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com mov ecx, [esp + 12] // width 34129335518f4127167ee54b0872ab715c674be06005fbarchard@google.com lea eax, [eax - 16 + ecx * 4] // last 4 pixels. 341327d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com movdqa xmm5, kARGBShuffleMirror 341427d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com 3415c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 341627d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com convertloop: 34179335518f4127167ee54b0872ab715c674be06005fbarchard@google.com movdqa xmm0, [eax] 34189335518f4127167ee54b0872ab715c674be06005fbarchard@google.com lea eax, [eax - 16] 341927d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com pshufb xmm0, xmm5 342027d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com sub ecx, 4 342127d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com movdqa [edx], xmm0 342227d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com lea edx, [edx + 16] 342327d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com jg convertloop 342427d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com ret 342527d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com } 342627d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com} 342727d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com#endif // HAS_ARGBMIRRORROW_SSSE3 342827d42c7ff6452c53643bc57ee8b7b17afbe8dfd0fbarchard@google.com 342951398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com#ifdef HAS_ARGBMIRRORROW_AVX2 343051398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com// Shuffle table for reversing the bytes. 3431851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const ulvec32 kARGBShuffleMirror_AVX2 = { 343251398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u 343351398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com}; 343451398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com 343551398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com__declspec(naked) __declspec(align(16)) 343651398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.comvoid ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) { 343751398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com __asm { 343851398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com mov eax, [esp + 4] // src 343951398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com mov edx, [esp + 8] // dst 344051398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com mov ecx, [esp + 12] // width 344151398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com lea eax, [eax - 32] 344251398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com vmovdqa ymm5, kARGBShuffleMirror_AVX2 344351398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com 3444c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 344551398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com convertloop: 344651398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com vpermd ymm0, ymm5, [eax + ecx * 4] // permute dword order 344751398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com sub ecx, 8 344851398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com vmovdqu [edx], ymm0 344951398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com lea edx, [edx + 32] 345051398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com jg convertloop 34519b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 345251398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com ret 345351398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com } 345451398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com} 345551398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com#endif // HAS_ARGBMIRRORROW_AVX2 345651398e0be5004b8818df4a4ccda9fe77bcfaf141fbarchard@google.com 3457f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com#ifdef HAS_SPLITUVROW_SSE2 3458d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 3459f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { 34602d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com __asm { 34612d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com push edi 34622d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com mov eax, [esp + 4 + 4] // src_uv 34632d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com mov edx, [esp + 4 + 8] // dst_u 34642d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com mov edi, [esp + 4 + 12] // dst_v 34652d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com mov ecx, [esp + 4 + 16] // pix 34662d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff 34672d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com psrlw xmm5, 8 34682d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com sub edi, edx 34692d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com 3470c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 34712d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com convertloop: 34722d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com movdqa xmm0, [eax] 34732d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com movdqa xmm1, [eax + 16] 34742d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com lea eax, [eax + 32] 34752d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com movdqa xmm2, xmm0 34762d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com movdqa xmm3, xmm1 34772d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com pand xmm0, xmm5 // even bytes 34782d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com pand xmm1, xmm5 34792d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com packuswb xmm0, xmm1 34802d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com psrlw xmm2, 8 // odd bytes 34812d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com psrlw xmm3, 8 34822d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com packuswb xmm2, xmm3 34832d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com movdqa [edx], xmm0 34842d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com movdqa [edx + edi], xmm2 34852d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com lea edx, [edx + 16] 34862d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com sub ecx, 16 348718184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 348818184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com 34892d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com pop edi 34902d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com ret 34912d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com } 34922d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com} 3493db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com 3494db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com__declspec(naked) __declspec(align(16)) 3495f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 3496f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com int pix) { 3497db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com __asm { 3498db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com push edi 3499db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com mov eax, [esp + 4 + 4] // src_uv 3500db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com mov edx, [esp + 4 + 8] // dst_u 3501db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com mov edi, [esp + 4 + 12] // dst_v 3502db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com mov ecx, [esp + 4 + 16] // pix 3503db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff 3504db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com psrlw xmm5, 8 3505db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com sub edi, edx 3506db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com 3507c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 3508db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com convertloop: 3509db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com movdqu xmm0, [eax] 3510db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com movdqu xmm1, [eax + 16] 3511db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com lea eax, [eax + 32] 3512db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com movdqa xmm2, xmm0 3513db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com movdqa xmm3, xmm1 3514db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com pand xmm0, xmm5 // even bytes 3515db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com pand xmm1, xmm5 3516db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com packuswb xmm0, xmm1 3517db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com psrlw xmm2, 8 // odd bytes 3518db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com psrlw xmm3, 8 3519db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com packuswb xmm2, xmm3 3520db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com movdqu [edx], xmm0 3521db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com movdqu [edx + edi], xmm2 3522db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com lea edx, [edx + 16] 3523db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com sub ecx, 16 3524db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com jg convertloop 3525db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com 3526db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com pop edi 3527db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com ret 3528db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com } 3529db694edfc2dcdede9adad7febc4e4b7f9506eee8fbarchard@google.com} 3530f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com#endif // HAS_SPLITUVROW_SSE2 35312d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com 3532c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com#ifdef HAS_SPLITUVROW_AVX2 3533c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com__declspec(naked) __declspec(align(16)) 3534c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.comvoid SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { 3535c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com __asm { 3536c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com push edi 3537c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com mov eax, [esp + 4 + 4] // src_uv 3538c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com mov edx, [esp + 4 + 8] // dst_u 3539c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com mov edi, [esp + 4 + 12] // dst_v 3540c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com mov ecx, [esp + 4 + 16] // pix 3541c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff 3542c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com vpsrlw ymm5, ymm5, 8 3543c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com sub edi, edx 3544c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com 3545c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 3546c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com convertloop: 3547b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm0, [eax] 3548b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm1, [eax + 32] 3549c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com lea eax, [eax + 64] 3550c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com vpsrlw ymm2, ymm0, 8 // odd bytes 3551c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com vpsrlw ymm3, ymm1, 8 3552c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com vpand ymm0, ymm0, ymm5 // even bytes 3553c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com vpand ymm1, ymm1, ymm5 3554c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com vpackuswb ymm0, ymm0, ymm1 3555c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com vpackuswb ymm2, ymm2, ymm3 3556c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com vpermq ymm0, ymm0, 0xd8 3557c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com vpermq ymm2, ymm2, 0xd8 3558b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu [edx], ymm0 3559b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu [edx + edi], ymm2 3560c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com lea edx, [edx + 32] 3561c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com sub ecx, 32 3562c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com jg convertloop 3563c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com 3564c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com pop edi 35659b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 3566c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com ret 3567c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com } 3568c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com} 3569b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com#endif // HAS_SPLITUVROW_AVX2 3570c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com 3571b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com#ifdef HAS_MERGEUVROW_SSE2 3572c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com__declspec(naked) __declspec(align(16)) 3573b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.comvoid MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, 3574b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com int width) { 3575c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com __asm { 3576c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com push edi 3577b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov eax, [esp + 4 + 4] // src_u 3578b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov edx, [esp + 4 + 8] // src_v 3579b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov edi, [esp + 4 + 12] // dst_uv 3580b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov ecx, [esp + 4 + 16] // width 3581b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com sub edx, eax 3582c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com 3583c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 3584c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com convertloop: 3585b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com movdqa xmm0, [eax] // read 16 U's 3586b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com movdqa xmm1, [eax + edx] // and 16 V's 3587b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com lea eax, [eax + 16] 3588b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com movdqa xmm2, xmm0 3589b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com punpcklbw xmm0, xmm1 // first 8 UV pairs 3590b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com punpckhbw xmm2, xmm1 // next 8 UV pairs 3591b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com movdqa [edi], xmm0 3592b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com movdqa [edi + 16], xmm2 3593b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com lea edi, [edi + 32] 3594b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com sub ecx, 16 3595c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com jg convertloop 3596c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com 3597c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com pop edi 3598c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com ret 3599c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com } 3600c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com} 3601c9562334d7ee6b7ecda388001f47864d5fd6ca0afbarchard@google.com 3602e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 3603f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, 3604f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com uint8* dst_uv, int width) { 3605e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com __asm { 3606e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com push edi 3607e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com mov eax, [esp + 4 + 4] // src_u 3608e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com mov edx, [esp + 4 + 8] // src_v 3609e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com mov edi, [esp + 4 + 12] // dst_uv 3610e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com mov ecx, [esp + 4 + 16] // width 3611e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com sub edx, eax 3612e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com 3613c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 3614e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com convertloop: 3615e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com movdqu xmm0, [eax] // read 16 U's 3616e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com movdqu xmm1, [eax + edx] // and 16 V's 3617e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com lea eax, [eax + 16] 3618e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com movdqa xmm2, xmm0 3619e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com punpcklbw xmm0, xmm1 // first 8 UV pairs 3620e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com punpckhbw xmm2, xmm1 // next 8 UV pairs 3621e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com movdqu [edi], xmm0 3622e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com movdqu [edi + 16], xmm2 3623e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com lea edi, [edi + 32] 3624e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com sub ecx, 16 3625e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com jg convertloop 3626e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com 3627e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com pop edi 3628e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com ret 3629e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com } 3630e0d8648b6ab861cfcf03513439fad8ae39ba50c2fbarchard@google.com} 3631f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com#endif // HAS_MERGEUVROW_SSE2 36321dafd444ba355e8188cc42c61d3ad85d6681fd1dfbarchard@google.com 3633b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com#ifdef HAS_MERGEUVROW_AVX2 3634b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com__declspec(naked) __declspec(align(16)) 3635b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.comvoid MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, 3636b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com int width) { 3637b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com __asm { 3638b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com push edi 3639b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov eax, [esp + 4 + 4] // src_u 3640b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov edx, [esp + 4 + 8] // src_v 3641b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov edi, [esp + 4 + 12] // dst_uv 3642b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov ecx, [esp + 4 + 16] // width 3643b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com sub edx, eax 3644b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 3645c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 3646b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com convertloop: 3647b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm0, [eax] // read 32 U's 3648b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm1, [eax + edx] // and 32 V's 3649b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com lea eax, [eax + 32] 3650b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpunpcklbw ymm2, ymm0, ymm1 // low 16 UV pairs. mutated qqword 0,2 3651b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpunpckhbw ymm0, ymm0, ymm1 // high 16 UV pairs. mutated qqword 1,3 3652b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vperm2i128 ymm1, ymm2, ymm0, 0x20 // low 128 of ymm2 and low 128 of ymm0 3653b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vperm2i128 ymm2, ymm2, ymm0, 0x31 // high 128 of ymm2 and high 128 of ymm0 3654b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu [edi], ymm1 3655b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu [edi + 32], ymm2 3656b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com lea edi, [edi + 64] 3657b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com sub ecx, 32 3658b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com jg convertloop 3659b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 3660b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com pop edi 36619b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 3662b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com ret 3663b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com } 3664b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com} 3665b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com#endif // HAS_MERGEUVROW_AVX2 3666b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 366719932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com#ifdef HAS_COPYROW_SSE2 3668c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time. 3669d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 367019932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.comvoid CopyRow_SSE2(const uint8* src, uint8* dst, int count) { 367119932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com __asm { 367219932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com mov eax, [esp + 4] // src 367319932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com mov edx, [esp + 8] // dst 367419932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com mov ecx, [esp + 12] // count 3675ba3aeed3b86dfae7bc0631c8bed9b50303318dcafbarchard@google.com 3676c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 367719932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com convertloop: 367819932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com movdqa xmm0, [eax] 367919932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com movdqa xmm1, [eax + 16] 368019932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com lea eax, [eax + 32] 3681c140b9d150bb40ff79a2a53ad560494b67fb115ffbarchard@google.com movdqa [edx], xmm0 3682c140b9d150bb40ff79a2a53ad560494b67fb115ffbarchard@google.com movdqa [edx + 16], xmm1 3683c140b9d150bb40ff79a2a53ad560494b67fb115ffbarchard@google.com lea edx, [edx + 32] 368419932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com sub ecx, 32 368518184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 368619932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com ret 368719932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com } 368819932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com} 368919932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com#endif // HAS_COPYROW_SSE2 369019932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com 3691b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com// Unaligned Multiple of 1. 3692b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com__declspec(naked) __declspec(align(16)) 3693aa7988ff733b13d7bfd3c755bf0c18f93b9e8f6efbarchard@google.comvoid CopyRow_ERMS(const uint8* src, uint8* dst, int count) { 3694b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com __asm { 3695b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov eax, esi 3696b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov edx, edi 3697b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov esi, [esp + 4] // src 3698b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov edi, [esp + 8] // dst 3699b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov ecx, [esp + 12] // count 3700b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com rep movsb 3701b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov edi, edx 3702b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov esi, eax 3703b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com ret 3704b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com } 3705b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com} 3706b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 370719932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com#ifdef HAS_COPYROW_X86 3708d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 370919932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.comvoid CopyRow_X86(const uint8* src, uint8* dst, int count) { 371019932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com __asm { 371119932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com mov eax, esi 371219932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com mov edx, edi 371319932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com mov esi, [esp + 4] // src 371419932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com mov edi, [esp + 8] // dst 371519932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com mov ecx, [esp + 12] // count 371619932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com shr ecx, 2 371719932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com rep movsd 371819932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com mov edi, edx 371919932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com mov esi, eax 372019932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com ret 372119932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com } 372219932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com} 37234c416e8849dcda3e884c6204bcdaa264f66d4288fbarchard@google.com#endif // HAS_COPYROW_X86 372419932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com 37257f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.com#ifdef HAS_ARGBCOPYALPHAROW_SSE2 37267f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.com// width in pixels 37277f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.com__declspec(naked) __declspec(align(16)) 37287f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.comvoid ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { 37297f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.com __asm { 37307f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.com mov eax, [esp + 4] // src 3731f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com mov edx, [esp + 8] // dst 37327f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.com mov ecx, [esp + 12] // count 3733f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com pcmpeqb xmm0, xmm0 // generate mask 0xff000000 3734f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com pslld xmm0, 24 3735f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff 3736f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com psrld xmm1, 8 37377f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.com 3738f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com align 4 37397f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.com convertloop: 3740f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com movdqa xmm2, [eax] 3741f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com movdqa xmm3, [eax + 16] 37427f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.com lea eax, [eax + 32] 3743f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com movdqa xmm4, [edx] 3744f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com movdqa xmm5, [edx + 16] 3745f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com pand xmm2, xmm0 3746f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com pand xmm3, xmm0 3747f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com pand xmm4, xmm1 3748f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com pand xmm5, xmm1 3749f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com por xmm2, xmm4 3750f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com por xmm3, xmm5 3751f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com movdqa [edx], xmm2 3752f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com movdqa [edx + 16], xmm3 3753f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com lea edx, [edx + 32] 37547f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.com sub ecx, 8 37557f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.com jg convertloop 37567f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.com 37577f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.com ret 37587f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.com } 37597f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.com} 37607f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.com#endif // HAS_ARGBCOPYALPHAROW_SSE2 37617f67961ec53f0ad12f827905fc4a4cc880f00931fbarchard@google.com 3762f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com#ifdef HAS_ARGBCOPYALPHAROW_AVX2 3763f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com// width in pixels 3764f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com__declspec(naked) __declspec(align(16)) 3765f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.comvoid ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { 3766f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com __asm { 3767f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com mov eax, [esp + 4] // src 3768f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com mov edx, [esp + 8] // dst 3769f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com mov ecx, [esp + 12] // count 37703075de82856a044ebd3e808b2f0918d2b0e9713cfbarchard@google.com vpcmpeqb ymm0, ymm0, ymm0 3771adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff 3772f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com 3773f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com align 4 3774f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com convertloop: 3775adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vmovdqu ymm1, [eax] 3776adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vmovdqu ymm2, [eax + 32] 3777f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com lea eax, [eax + 64] 3778adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vpblendvb ymm1, ymm1, [edx], ymm0 3779adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vpblendvb ymm2, ymm2, [edx + 32], ymm0 3780adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vmovdqu [edx], ymm1 3781adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vmovdqu [edx + 32], ymm2 3782f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com lea edx, [edx + 64] 3783f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com sub ecx, 16 3784f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com jg convertloop 3785f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com 3786f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com vzeroupper 3787f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com ret 3788f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com } 3789f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com} 3790f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com#endif // HAS_ARGBCOPYALPHAROW_AVX2 3791f6631bb814600f841f74a9d8a626b528be2fd8bbfbarchard@google.com 3792adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 3793adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com// width in pixels 3794adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com__declspec(naked) __declspec(align(16)) 3795adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.comvoid ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { 3796adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com __asm { 3797adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com mov eax, [esp + 4] // src 3798adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com mov edx, [esp + 8] // dst 3799adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com mov ecx, [esp + 12] // count 3800adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com pcmpeqb xmm0, xmm0 // generate mask 0xff000000 3801adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com pslld xmm0, 24 3802adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff 3803adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com psrld xmm1, 8 3804adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com 3805adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com align 4 3806adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com convertloop: 3807adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com movq xmm2, qword ptr [eax] // 8 Y's 3808adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com lea eax, [eax + 8] 3809adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com punpcklbw xmm2, xmm2 3810adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com punpckhwd xmm3, xmm2 3811adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com punpcklwd xmm2, xmm2 3812adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com movdqa xmm4, [edx] 3813adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com movdqa xmm5, [edx + 16] 3814adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com pand xmm2, xmm0 3815adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com pand xmm3, xmm0 3816adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com pand xmm4, xmm1 3817adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com pand xmm5, xmm1 3818adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com por xmm2, xmm4 3819adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com por xmm3, xmm5 3820adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com movdqa [edx], xmm2 3821adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com movdqa [edx + 16], xmm3 3822adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com lea edx, [edx + 32] 3823adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com sub ecx, 8 3824adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com jg convertloop 3825adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com 3826adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com ret 3827adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com } 3828adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com} 3829adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com#endif // HAS_ARGBCOPYYTOALPHAROW_SSE2 3830adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com 3831adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 3832adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com// width in pixels 3833adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com__declspec(naked) __declspec(align(16)) 3834adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.comvoid ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { 3835adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com __asm { 3836adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com mov eax, [esp + 4] // src 3837adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com mov edx, [esp + 8] // dst 3838adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com mov ecx, [esp + 12] // count 3839adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vpcmpeqb ymm0, ymm0, ymm0 3840adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff 3841adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com 3842adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com align 4 3843adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com convertloop: 3844adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vpmovzxbd ymm1, qword ptr [eax] 3845adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vpmovzxbd ymm2, qword ptr [eax + 8] 3846adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com lea eax, [eax + 16] 3847adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vpslld ymm1, ymm1, 24 3848adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vpslld ymm2, ymm2, 24 3849adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vpblendvb ymm1, ymm1, [edx], ymm0 3850adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vpblendvb ymm2, ymm2, [edx + 32], ymm0 3851adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vmovdqu [edx], ymm1 3852adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vmovdqu [edx + 32], ymm2 3853adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com lea edx, [edx + 64] 3854adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com sub ecx, 16 3855adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com jg convertloop 3856adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com 3857adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com vzeroupper 3858adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com ret 3859adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com } 3860adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com} 3861adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com#endif // HAS_ARGBCOPYYTOALPHAROW_AVX2 3862adef267edfb3539cd773692d6fa4050ffd092f55fbarchard@google.com 386364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#ifdef HAS_SETROW_X86 386464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// SetRow8 writes 'count' bytes using a 32 bit value repeated. 386564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com__declspec(naked) __declspec(align(16)) 3866f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid SetRow_X86(uint8* dst, uint32 v32, int count) { 386764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com __asm { 386864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com mov edx, edi 386964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com mov edi, [esp + 4] // dst 387064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com mov eax, [esp + 8] // v32 387164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com mov ecx, [esp + 12] // count 387264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com shr ecx, 2 387364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com rep stosd 387464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com mov edi, edx 387564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ret 387664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com } 387764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 387864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 387964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// SetRow32 writes 'count' words using a 32 bit value repeated. 388064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com__declspec(naked) __declspec(align(16)) 3881f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid ARGBSetRows_X86(uint8* dst, uint32 v32, int width, 388264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com int dst_stride, int height) { 388364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com __asm { 388464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com push esi 388564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com push edi 388664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com push ebp 388764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com mov edi, [esp + 12 + 4] // dst 388864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com mov eax, [esp + 12 + 8] // v32 388964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com mov ebp, [esp + 12 + 12] // width 389064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com mov edx, [esp + 12 + 16] // dst_stride 389164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com mov esi, [esp + 12 + 20] // height 389264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com lea ecx, [ebp * 4] 389364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com sub edx, ecx // stride - width * 4 389464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 3895c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 389664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com convertloop: 389764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com mov ecx, ebp 389864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com rep stosd 389964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com add edi, edx 390064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com sub esi, 1 390164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com jg convertloop 390264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 390364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com pop ebp 390464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com pop edi 390564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com pop esi 390664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ret 390764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com } 390864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 390964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#endif // HAS_SETROW_X86 391064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 3911b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com#ifdef HAS_YUY2TOYROW_AVX2 3912b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com__declspec(naked) __declspec(align(16)) 3913b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.comvoid YUY2ToYRow_AVX2(const uint8* src_yuy2, 3914b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com uint8* dst_y, int pix) { 3915b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com __asm { 3916b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov eax, [esp + 4] // src_yuy2 3917b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov edx, [esp + 8] // dst_y 3918b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov ecx, [esp + 12] // pix 3919b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff 3920b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpsrlw ymm5, ymm5, 8 3921b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 3922c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 3923b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com convertloop: 3924b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm0, [eax] 3925b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm1, [eax + 32] 3926b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com lea eax, [eax + 64] 3927b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpand ymm0, ymm0, ymm5 // even bytes are Y 3928b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpand ymm1, ymm1, ymm5 3929b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpackuswb ymm0, ymm0, ymm1 // mutates. 3930b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpermq ymm0, ymm0, 0xd8 3931b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com sub ecx, 32 3932b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu [edx], ymm0 3933b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com lea edx, [edx + 32] 3934b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com jg convertloop 39359b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 3936b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com ret 3937b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com } 3938b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com} 3939b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 3940b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com__declspec(naked) __declspec(align(16)) 3941b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.comvoid YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, 3942b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 3943b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com __asm { 3944b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com push esi 3945b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com push edi 3946b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov eax, [esp + 8 + 4] // src_yuy2 3947b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov esi, [esp + 8 + 8] // stride_yuy2 3948b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov edx, [esp + 8 + 12] // dst_u 3949b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov edi, [esp + 8 + 16] // dst_v 3950b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov ecx, [esp + 8 + 20] // pix 3951b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff 3952b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpsrlw ymm5, ymm5, 8 3953b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com sub edi, edx 3954b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 3955c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 3956b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com convertloop: 3957b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm0, [eax] 3958b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm1, [eax + 32] 3959b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpavgb ymm0, ymm0, [eax + esi] 3960b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpavgb ymm1, ymm1, [eax + esi + 32] 3961b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com lea eax, [eax + 64] 3962b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpsrlw ymm0, ymm0, 8 // YUYV -> UVUV 3963b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpsrlw ymm1, ymm1, 8 3964b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpackuswb ymm0, ymm0, ymm1 // mutates. 3965b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpermq ymm0, ymm0, 0xd8 3966b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpand ymm1, ymm0, ymm5 // U 3967b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpsrlw ymm0, ymm0, 8 // V 3968b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpackuswb ymm1, ymm1, ymm1 // mutates. 3969b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpackuswb ymm0, ymm0, ymm0 // mutates. 3970b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpermq ymm1, ymm1, 0xd8 3971b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpermq ymm0, ymm0, 0xd8 3972b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vextractf128 [edx], ymm1, 0 // U 3973b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vextractf128 [edx + edi], ymm0, 0 // V 3974b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com lea edx, [edx + 16] 3975b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com sub ecx, 32 3976b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com jg convertloop 3977b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 3978b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com pop edi 3979b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com pop esi 39809b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 3981b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com ret 3982b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com } 3983b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com} 3984b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 3985b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com__declspec(naked) __declspec(align(16)) 3986b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.comvoid YUY2ToUV422Row_AVX2(const uint8* src_yuy2, 3987b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 3988b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com __asm { 3989b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com push edi 3990b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov eax, [esp + 4 + 4] // src_yuy2 3991b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov edx, [esp + 4 + 8] // dst_u 3992b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov edi, [esp + 4 + 12] // dst_v 3993b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov ecx, [esp + 4 + 16] // pix 3994b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff 3995b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpsrlw ymm5, ymm5, 8 3996b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com sub edi, edx 3997b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 3998c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 3999b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com convertloop: 4000b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm0, [eax] 4001b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm1, [eax + 32] 4002b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com lea eax, [eax + 64] 4003b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpsrlw ymm0, ymm0, 8 // YUYV -> UVUV 4004b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpsrlw ymm1, ymm1, 8 4005b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpackuswb ymm0, ymm0, ymm1 // mutates. 4006b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpermq ymm0, ymm0, 0xd8 4007b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpand ymm1, ymm0, ymm5 // U 4008b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpsrlw ymm0, ymm0, 8 // V 4009b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpackuswb ymm1, ymm1, ymm1 // mutates. 4010b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpackuswb ymm0, ymm0, ymm0 // mutates. 4011b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpermq ymm1, ymm1, 0xd8 4012b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpermq ymm0, ymm0, 0xd8 4013b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vextractf128 [edx], ymm1, 0 // U 4014b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vextractf128 [edx + edi], ymm0, 0 // V 4015b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com lea edx, [edx + 16] 4016b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com sub ecx, 32 4017b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com jg convertloop 4018b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 4019b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com pop edi 40209b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 4021b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com ret 4022b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com } 4023b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com} 4024b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 4025b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com__declspec(naked) __declspec(align(16)) 4026b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.comvoid UYVYToYRow_AVX2(const uint8* src_uyvy, 4027b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com uint8* dst_y, int pix) { 4028b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com __asm { 4029b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov eax, [esp + 4] // src_uyvy 4030b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov edx, [esp + 8] // dst_y 4031b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov ecx, [esp + 12] // pix 4032b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 4033c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4034b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com convertloop: 4035b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm0, [eax] 4036b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm1, [eax + 32] 4037b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com lea eax, [eax + 64] 4038b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpsrlw ymm0, ymm0, 8 // odd bytes are Y 4039b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpsrlw ymm1, ymm1, 8 4040b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpackuswb ymm0, ymm0, ymm1 // mutates. 4041b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpermq ymm0, ymm0, 0xd8 4042b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com sub ecx, 32 4043b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu [edx], ymm0 4044b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com lea edx, [edx + 32] 4045b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com jg convertloop 4046b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com ret 40479b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 4048b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com } 4049b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com} 4050b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 4051b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com__declspec(naked) __declspec(align(16)) 4052b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.comvoid UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, 4053b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 4054b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com __asm { 4055b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com push esi 4056b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com push edi 4057b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov eax, [esp + 8 + 4] // src_yuy2 4058b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov esi, [esp + 8 + 8] // stride_yuy2 4059b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov edx, [esp + 8 + 12] // dst_u 4060b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov edi, [esp + 8 + 16] // dst_v 4061b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov ecx, [esp + 8 + 20] // pix 4062b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff 4063b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpsrlw ymm5, ymm5, 8 4064b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com sub edi, edx 4065b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 4066c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4067b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com convertloop: 4068b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm0, [eax] 4069b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm1, [eax + 32] 4070b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpavgb ymm0, ymm0, [eax + esi] 4071b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpavgb ymm1, ymm1, [eax + esi + 32] 4072b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com lea eax, [eax + 64] 4073b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpand ymm0, ymm0, ymm5 // UYVY -> UVUV 4074b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpand ymm1, ymm1, ymm5 4075b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpackuswb ymm0, ymm0, ymm1 // mutates. 4076b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpermq ymm0, ymm0, 0xd8 4077b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpand ymm1, ymm0, ymm5 // U 4078b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpsrlw ymm0, ymm0, 8 // V 4079b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpackuswb ymm1, ymm1, ymm1 // mutates. 4080b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpackuswb ymm0, ymm0, ymm0 // mutates. 4081b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpermq ymm1, ymm1, 0xd8 4082b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpermq ymm0, ymm0, 0xd8 4083b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vextractf128 [edx], ymm1, 0 // U 4084b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vextractf128 [edx + edi], ymm0, 0 // V 4085b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com lea edx, [edx + 16] 4086b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com sub ecx, 32 4087b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com jg convertloop 4088b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 4089b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com pop edi 4090b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com pop esi 40919b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 4092b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com ret 4093b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com } 4094b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com} 4095b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 4096b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com__declspec(naked) __declspec(align(16)) 4097b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.comvoid UYVYToUV422Row_AVX2(const uint8* src_uyvy, 4098b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 4099b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com __asm { 4100b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com push edi 4101b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov eax, [esp + 4 + 4] // src_yuy2 4102b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov edx, [esp + 4 + 8] // dst_u 4103b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov edi, [esp + 4 + 12] // dst_v 4104b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com mov ecx, [esp + 4 + 16] // pix 4105b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff 4106b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpsrlw ymm5, ymm5, 8 4107b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com sub edi, edx 4108b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 4109c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4110b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com convertloop: 4111b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm0, [eax] 4112b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vmovdqu ymm1, [eax + 32] 4113b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com lea eax, [eax + 64] 4114b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpand ymm0, ymm0, ymm5 // UYVY -> UVUV 4115b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpand ymm1, ymm1, ymm5 4116b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpackuswb ymm0, ymm0, ymm1 // mutates. 4117b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpermq ymm0, ymm0, 0xd8 4118b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpand ymm1, ymm0, ymm5 // U 4119b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpsrlw ymm0, ymm0, 8 // V 4120b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpackuswb ymm1, ymm1, ymm1 // mutates. 4121b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpackuswb ymm0, ymm0, ymm0 // mutates. 4122b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpermq ymm1, ymm1, 0xd8 4123b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vpermq ymm0, ymm0, 0xd8 4124b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vextractf128 [edx], ymm1, 0 // U 4125b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com vextractf128 [edx + edi], ymm0, 0 // V 4126b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com lea edx, [edx + 16] 4127b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com sub ecx, 32 4128b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com jg convertloop 4129b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 4130b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com pop edi 41319b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 4132b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com ret 4133b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com } 4134b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com} 4135b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com#endif // HAS_YUY2TOYROW_AVX2 4136b444bae883e97c1e4579f2e1148cf14f9c7c18fbfbarchard@google.com 4137b95dbf24951d8b7118f680d75c7456a5f5d57bfffbarchard@google.com#ifdef HAS_YUY2TOYROW_SSE2 4138d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 4139e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.comvoid YUY2ToYRow_SSE2(const uint8* src_yuy2, 4140e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com uint8* dst_y, int pix) { 4141e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com __asm { 4142e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov eax, [esp + 4] // src_yuy2 4143e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov edx, [esp + 8] // dst_y 4144e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov ecx, [esp + 12] // pix 4145e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff 4146e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm5, 8 4147e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4148c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4149e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com convertloop: 4150e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa xmm0, [eax] 4151e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa xmm1, [eax + 16] 4152e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com lea eax, [eax + 32] 4153e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pand xmm0, xmm5 // even bytes are Y 4154e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pand xmm1, xmm5 4155e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com packuswb xmm0, xmm1 415618184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com sub ecx, 16 4157e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa [edx], xmm0 4158e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com lea edx, [edx + 16] 415918184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 4160e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com ret 4161e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com } 4162e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com} 4163e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4164d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 4165e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.comvoid YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, 4166c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 4167e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com __asm { 4168e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com push esi 4169e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com push edi 4170e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov eax, [esp + 8 + 4] // src_yuy2 4171e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov esi, [esp + 8 + 8] // stride_yuy2 4172e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov edx, [esp + 8 + 12] // dst_u 4173e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov edi, [esp + 8 + 16] // dst_v 4174e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov ecx, [esp + 8 + 20] // pix 4175e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff 4176e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm5, 8 4177e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com sub edi, edx 4178e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4179c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4180e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com convertloop: 4181e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa xmm0, [eax] 4182e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa xmm1, [eax + 16] 4183e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa xmm2, [eax + esi] 4184e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa xmm3, [eax + esi + 16] 4185e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com lea eax, [eax + 32] 4186e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pavgb xmm0, xmm2 4187e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pavgb xmm1, xmm3 4188e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm0, 8 // YUYV -> UVUV 4189e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm1, 8 4190e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com packuswb xmm0, xmm1 4191e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa xmm1, xmm0 4192e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pand xmm0, xmm5 // U 4193e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com packuswb xmm0, xmm0 4194e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm1, 8 // V 4195e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com packuswb xmm1, xmm1 4196e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movq qword ptr [edx], xmm0 4197e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movq qword ptr [edx + edi], xmm1 4198e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com lea edx, [edx + 8] 4199e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com sub ecx, 16 420018184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 4201e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4202e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pop edi 4203e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pop esi 4204e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com ret 4205e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com } 4206e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com} 4207e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4208d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 4209c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.comvoid YUY2ToUV422Row_SSE2(const uint8* src_yuy2, 4210c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 4211c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com __asm { 4212c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com push edi 4213c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com mov eax, [esp + 4 + 4] // src_yuy2 4214c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com mov edx, [esp + 4 + 8] // dst_u 4215c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com mov edi, [esp + 4 + 12] // dst_v 4216c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com mov ecx, [esp + 4 + 16] // pix 4217c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff 4218c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com psrlw xmm5, 8 4219c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com sub edi, edx 4220c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com 4221c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4222c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com convertloop: 4223c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movdqa xmm0, [eax] 4224c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movdqa xmm1, [eax + 16] 4225c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com lea eax, [eax + 32] 4226c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com psrlw xmm0, 8 // YUYV -> UVUV 4227c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com psrlw xmm1, 8 4228c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com packuswb xmm0, xmm1 4229c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movdqa xmm1, xmm0 4230c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com pand xmm0, xmm5 // U 4231c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com packuswb xmm0, xmm0 4232c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com psrlw xmm1, 8 // V 4233c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com packuswb xmm1, xmm1 4234c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movq qword ptr [edx], xmm0 4235c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movq qword ptr [edx + edi], xmm1 4236c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com lea edx, [edx + 8] 4237c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com sub ecx, 16 4238c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com jg convertloop 4239c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com 4240c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com pop edi 4241c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com ret 4242c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com } 4243c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com} 4244c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com 4245c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com__declspec(naked) __declspec(align(16)) 4246e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.comvoid YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2, 4247e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com uint8* dst_y, int pix) { 4248e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com __asm { 4249e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov eax, [esp + 4] // src_yuy2 4250e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov edx, [esp + 8] // dst_y 4251e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov ecx, [esp + 12] // pix 4252e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff 4253e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm5, 8 4254e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4255c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4256e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com convertloop: 4257e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqu xmm0, [eax] 4258e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqu xmm1, [eax + 16] 4259e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com lea eax, [eax + 32] 4260e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pand xmm0, xmm5 // even bytes are Y 4261e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pand xmm1, xmm5 4262e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com packuswb xmm0, xmm1 426318184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com sub ecx, 16 4264e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqu [edx], xmm0 4265e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com lea edx, [edx + 16] 426618184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 4267e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com ret 4268e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com } 4269e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com} 4270e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4271d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 4272e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.comvoid YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2, 4273c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 4274e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com __asm { 4275e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com push esi 4276e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com push edi 4277e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov eax, [esp + 8 + 4] // src_yuy2 4278e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov esi, [esp + 8 + 8] // stride_yuy2 4279e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov edx, [esp + 8 + 12] // dst_u 4280e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov edi, [esp + 8 + 16] // dst_v 4281e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov ecx, [esp + 8 + 20] // pix 4282e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff 4283e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm5, 8 4284e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com sub edi, edx 4285e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4286c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4287e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com convertloop: 4288e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqu xmm0, [eax] 4289e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqu xmm1, [eax + 16] 4290e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqu xmm2, [eax + esi] 4291e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqu xmm3, [eax + esi + 16] 4292e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com lea eax, [eax + 32] 4293e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pavgb xmm0, xmm2 4294e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pavgb xmm1, xmm3 4295e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm0, 8 // YUYV -> UVUV 4296e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm1, 8 4297e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com packuswb xmm0, xmm1 4298e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa xmm1, xmm0 4299e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pand xmm0, xmm5 // U 4300e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com packuswb xmm0, xmm0 4301e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm1, 8 // V 4302e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com packuswb xmm1, xmm1 4303e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movq qword ptr [edx], xmm0 4304e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movq qword ptr [edx + edi], xmm1 4305e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com lea edx, [edx + 8] 4306e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com sub ecx, 16 430718184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 4308e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4309e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pop edi 4310e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pop esi 4311e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com ret 4312e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com } 4313e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com} 4314e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4315d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 4316c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.comvoid YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2, 4317c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 4318c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com __asm { 4319c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com push edi 4320c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com mov eax, [esp + 4 + 4] // src_yuy2 4321c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com mov edx, [esp + 4 + 8] // dst_u 4322c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com mov edi, [esp + 4 + 12] // dst_v 4323c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com mov ecx, [esp + 4 + 16] // pix 4324c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff 4325c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com psrlw xmm5, 8 4326c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com sub edi, edx 4327c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com 4328c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4329c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com convertloop: 4330c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movdqu xmm0, [eax] 4331c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movdqu xmm1, [eax + 16] 4332c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com lea eax, [eax + 32] 4333c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com psrlw xmm0, 8 // YUYV -> UVUV 4334c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com psrlw xmm1, 8 4335c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com packuswb xmm0, xmm1 4336c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movdqa xmm1, xmm0 4337c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com pand xmm0, xmm5 // U 4338c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com packuswb xmm0, xmm0 4339c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com psrlw xmm1, 8 // V 4340c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com packuswb xmm1, xmm1 4341c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movq qword ptr [edx], xmm0 4342c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movq qword ptr [edx + edi], xmm1 4343c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com lea edx, [edx + 8] 4344c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com sub ecx, 16 4345c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com jg convertloop 4346c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com 4347c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com pop edi 4348c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com ret 4349c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com } 4350c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com} 4351c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com 4352c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com__declspec(naked) __declspec(align(16)) 4353e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.comvoid UYVYToYRow_SSE2(const uint8* src_uyvy, 4354e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com uint8* dst_y, int pix) { 4355e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com __asm { 4356e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov eax, [esp + 4] // src_uyvy 4357e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov edx, [esp + 8] // dst_y 4358e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov ecx, [esp + 12] // pix 4359e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4360c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4361e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com convertloop: 4362e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa xmm0, [eax] 4363e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa xmm1, [eax + 16] 4364e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com lea eax, [eax + 32] 4365e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm0, 8 // odd bytes are Y 4366e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm1, 8 4367e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com packuswb xmm0, xmm1 436818184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com sub ecx, 16 4369e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa [edx], xmm0 4370e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com lea edx, [edx + 16] 437118184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 4372e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com ret 4373e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com } 4374e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com} 4375e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4376d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 4377e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.comvoid UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, 4378c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 4379e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com __asm { 4380e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com push esi 4381e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com push edi 4382e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov eax, [esp + 8 + 4] // src_yuy2 4383e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov esi, [esp + 8 + 8] // stride_yuy2 4384e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov edx, [esp + 8 + 12] // dst_u 4385e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov edi, [esp + 8 + 16] // dst_v 4386e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov ecx, [esp + 8 + 20] // pix 4387e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff 4388e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm5, 8 4389e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com sub edi, edx 4390e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4391c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4392e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com convertloop: 4393e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa xmm0, [eax] 4394e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa xmm1, [eax + 16] 4395e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa xmm2, [eax + esi] 4396e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa xmm3, [eax + esi + 16] 4397e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com lea eax, [eax + 32] 4398e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pavgb xmm0, xmm2 4399e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pavgb xmm1, xmm3 4400e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pand xmm0, xmm5 // UYVY -> UVUV 4401e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pand xmm1, xmm5 4402e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com packuswb xmm0, xmm1 4403e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa xmm1, xmm0 4404e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pand xmm0, xmm5 // U 4405e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com packuswb xmm0, xmm0 4406e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm1, 8 // V 4407e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com packuswb xmm1, xmm1 4408e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movq qword ptr [edx], xmm0 4409e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movq qword ptr [edx + edi], xmm1 4410e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com lea edx, [edx + 8] 4411e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com sub ecx, 16 441218184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 4413e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4414e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pop edi 4415e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pop esi 4416e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com ret 4417e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com } 4418e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com} 4419e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4420d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 4421c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.comvoid UYVYToUV422Row_SSE2(const uint8* src_uyvy, 4422c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 4423c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com __asm { 4424c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com push edi 4425c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com mov eax, [esp + 4 + 4] // src_yuy2 4426c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com mov edx, [esp + 4 + 8] // dst_u 4427c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com mov edi, [esp + 4 + 12] // dst_v 4428c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com mov ecx, [esp + 4 + 16] // pix 4429c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff 4430c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com psrlw xmm5, 8 4431c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com sub edi, edx 4432c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com 4433c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4434c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com convertloop: 4435c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movdqa xmm0, [eax] 4436c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movdqa xmm1, [eax + 16] 4437c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com lea eax, [eax + 32] 4438c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com pand xmm0, xmm5 // UYVY -> UVUV 4439c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com pand xmm1, xmm5 4440c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com packuswb xmm0, xmm1 4441c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movdqa xmm1, xmm0 4442c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com pand xmm0, xmm5 // U 4443c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com packuswb xmm0, xmm0 4444c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com psrlw xmm1, 8 // V 4445c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com packuswb xmm1, xmm1 4446c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movq qword ptr [edx], xmm0 4447c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movq qword ptr [edx + edi], xmm1 4448c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com lea edx, [edx + 8] 4449c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com sub ecx, 16 4450c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com jg convertloop 4451c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com 4452c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com pop edi 4453c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com ret 4454c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com } 4455c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com} 4456c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com 4457c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com__declspec(naked) __declspec(align(16)) 4458e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.comvoid UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy, 4459e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com uint8* dst_y, int pix) { 4460e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com __asm { 4461e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov eax, [esp + 4] // src_uyvy 4462e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov edx, [esp + 8] // dst_y 4463e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov ecx, [esp + 12] // pix 4464e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4465c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4466e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com convertloop: 4467e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqu xmm0, [eax] 4468e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqu xmm1, [eax + 16] 4469e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com lea eax, [eax + 32] 4470e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm0, 8 // odd bytes are Y 4471e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm1, 8 4472e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com packuswb xmm0, xmm1 447318184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com sub ecx, 16 4474e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqu [edx], xmm0 4475e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com lea edx, [edx + 16] 447618184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 4477e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com ret 4478e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com } 4479e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com} 4480e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4481d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com__declspec(naked) __declspec(align(16)) 4482e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.comvoid UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy, 4483c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 4484e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com __asm { 4485e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com push esi 4486e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com push edi 4487e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov eax, [esp + 8 + 4] // src_yuy2 4488e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov esi, [esp + 8 + 8] // stride_yuy2 4489e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov edx, [esp + 8 + 12] // dst_u 4490e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov edi, [esp + 8 + 16] // dst_v 4491e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com mov ecx, [esp + 8 + 20] // pix 4492e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff 4493e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm5, 8 4494e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com sub edi, edx 4495e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4496c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4497e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com convertloop: 4498e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqu xmm0, [eax] 4499e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqu xmm1, [eax + 16] 4500e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqu xmm2, [eax + esi] 4501e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqu xmm3, [eax + esi + 16] 4502e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com lea eax, [eax + 32] 4503e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pavgb xmm0, xmm2 4504e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pavgb xmm1, xmm3 4505e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pand xmm0, xmm5 // UYVY -> UVUV 4506e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pand xmm1, xmm5 4507e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com packuswb xmm0, xmm1 4508e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movdqa xmm1, xmm0 4509e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pand xmm0, xmm5 // U 4510e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com packuswb xmm0, xmm0 4511e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com psrlw xmm1, 8 // V 4512e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com packuswb xmm1, xmm1 4513e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movq qword ptr [edx], xmm0 4514e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com movq qword ptr [edx + edi], xmm1 4515e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com lea edx, [edx + 8] 4516e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com sub ecx, 16 451718184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com jg convertloop 4518e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4519e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pop edi 4520e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com pop esi 4521e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com ret 4522e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com } 4523e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com} 4524c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com 4525c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com__declspec(naked) __declspec(align(16)) 4526c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.comvoid UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy, 4527c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 4528c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com __asm { 4529c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com push edi 4530c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com mov eax, [esp + 4 + 4] // src_yuy2 4531c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com mov edx, [esp + 4 + 8] // dst_u 4532c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com mov edi, [esp + 4 + 12] // dst_v 4533c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com mov ecx, [esp + 4 + 16] // pix 4534c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff 4535c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com psrlw xmm5, 8 4536c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com sub edi, edx 4537c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com 4538c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4539c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com convertloop: 4540c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movdqu xmm0, [eax] 4541c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movdqu xmm1, [eax + 16] 4542c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com lea eax, [eax + 32] 4543c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com pand xmm0, xmm5 // UYVY -> UVUV 4544c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com pand xmm1, xmm5 4545c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com packuswb xmm0, xmm1 4546c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movdqa xmm1, xmm0 4547c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com pand xmm0, xmm5 // U 4548c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com packuswb xmm0, xmm0 4549c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com psrlw xmm1, 8 // V 4550c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com packuswb xmm1, xmm1 4551c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movq qword ptr [edx], xmm0 4552c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com movq qword ptr [edx + edi], xmm1 4553c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com lea edx, [edx + 8] 4554c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com sub ecx, 16 4555c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com jg convertloop 4556c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com 4557c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com pop edi 4558c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com ret 4559c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com } 4560c704f789e9305890d865e6334f57a9febbc83e45fbarchard@google.com} 4561b95dbf24951d8b7118f680d75c7456a5f5d57bfffbarchard@google.com#endif // HAS_YUY2TOYROW_SSE2 4562e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com 4563965fb914ea3f5057cd186763c9af5d3110c44acdfbarchard@google.com#ifdef HAS_ARGBBLENDROW_SSE2 45641702ec78f85cc484e10eeac501971f76ab173b83fbarchard@google.com// Blend 8 pixels at a time. 456591ab139558747d9109552ec65632e6da9e170861fbarchard@google.com__declspec(naked) __declspec(align(16)) 4566bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.comvoid ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, 4567bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com uint8* dst_argb, int width) { 4568c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com __asm { 4569c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com push esi 4570c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com mov eax, [esp + 4 + 4] // src_argb0 4571c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com mov esi, [esp + 4 + 8] // src_argb1 4572c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com mov edx, [esp + 4 + 12] // dst_argb 4573c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com mov ecx, [esp + 4 + 16] // width 4574c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pcmpeqb xmm7, xmm7 // generate constant 1 4575c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com psrlw xmm7, 15 4576c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pcmpeqb xmm6, xmm6 // generate mask 0x00ff00ff 4577c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com psrlw xmm6, 8 4578c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00 4579c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com psllw xmm5, 8 4580c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pcmpeqb xmm4, xmm4 // generate mask 0xff000000 4581c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pslld xmm4, 24 4582c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com 4583bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com sub ecx, 1 4584bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com je convertloop1 // only 1 pixel? 4585bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com jl convertloop1b 4586bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com 4587bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com // 1 pixel loop until destination pointer is aligned. 4588bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com alignloop1: 4589bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com test edx, 15 // aligned? 4590bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com je alignloop1b 4591bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com movd xmm3, [eax] 4592bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com lea eax, [eax + 4] 4593bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com movdqa xmm0, xmm3 // src argb 4594bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com pxor xmm3, xmm4 // ~alpha 4595bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com movd xmm2, [esi] // _r_b 4596bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com psrlw xmm3, 8 // alpha 459798a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com pshufhw xmm3, xmm3, 0F5h // 8 alpha words 459898a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com pshuflw xmm3, xmm3, 0F5h 4599bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com pand xmm2, xmm6 // _r_b 4600bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com paddw xmm3, xmm7 // 256 - alpha 4601bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com pmullw xmm2, xmm3 // _r_b * alpha 4602bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com movd xmm1, [esi] // _a_g 4603bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com lea esi, [esi + 4] 4604bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com psrlw xmm1, 8 // _a_g 4605bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com por xmm0, xmm4 // set alpha to 255 4606bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com pmullw xmm1, xmm3 // _a_g * alpha 4607bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com psrlw xmm2, 8 // _r_b convert to 8 bits again 4608bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com paddusb xmm0, xmm2 // + src argb 4609bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com pand xmm1, xmm5 // a_g_ convert to 8 bits again 4610bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com paddusb xmm0, xmm1 // + src argb 4611bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com sub ecx, 1 4612bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com movd [edx], xmm0 4613bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com lea edx, [edx + 4] 4614bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com jge alignloop1 4615bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com 4616bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com alignloop1b: 4617bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com add ecx, 1 - 4 4618bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com jl convertloop4b 4619bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com 4620794fe1236a29b272cf36442c8c4c3d97a33ff64ffbarchard@google.com // 4 pixel loop. 4621bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com convertloop4: 4622794fe1236a29b272cf36442c8c4c3d97a33ff64ffbarchard@google.com movdqu xmm3, [eax] // src argb 4623794fe1236a29b272cf36442c8c4c3d97a33ff64ffbarchard@google.com lea eax, [eax + 16] 4624c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com movdqa xmm0, xmm3 // src argb 4625c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pxor xmm3, xmm4 // ~alpha 4626794fe1236a29b272cf36442c8c4c3d97a33ff64ffbarchard@google.com movdqu xmm2, [esi] // _r_b 4627c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com psrlw xmm3, 8 // alpha 462898a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com pshufhw xmm3, xmm3, 0F5h // 8 alpha words 462998a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com pshuflw xmm3, xmm3, 0F5h 4630c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pand xmm2, xmm6 // _r_b 4631c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com paddw xmm3, xmm7 // 256 - alpha 4632c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pmullw xmm2, xmm3 // _r_b * alpha 46331702ec78f85cc484e10eeac501971f76ab173b83fbarchard@google.com movdqu xmm1, [esi] // _a_g 4634794fe1236a29b272cf36442c8c4c3d97a33ff64ffbarchard@google.com lea esi, [esi + 16] 4635c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com psrlw xmm1, 8 // _a_g 4636c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com por xmm0, xmm4 // set alpha to 255 4637c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pmullw xmm1, xmm3 // _a_g * alpha 4638c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com psrlw xmm2, 8 // _r_b convert to 8 bits again 4639c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com paddusb xmm0, xmm2 // + src argb 4640c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pand xmm1, xmm5 // a_g_ convert to 8 bits again 4641c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com paddusb xmm0, xmm1 // + src argb 4642c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com sub ecx, 4 4643c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com movdqa [edx], xmm0 4644794fe1236a29b272cf36442c8c4c3d97a33ff64ffbarchard@google.com lea edx, [edx + 16] 4645bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com jge convertloop4 4646c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com 4647bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com convertloop4b: 4648bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com add ecx, 4 - 1 4649bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com jl convertloop1b 4650c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com 4651bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com // 1 pixel loop. 4652bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com convertloop1: 4653794fe1236a29b272cf36442c8c4c3d97a33ff64ffbarchard@google.com movd xmm3, [eax] // src argb 4654c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com lea eax, [eax + 4] 4655c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com movdqa xmm0, xmm3 // src argb 4656c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pxor xmm3, xmm4 // ~alpha 4657c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com movd xmm2, [esi] // _r_b 4658c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com psrlw xmm3, 8 // alpha 465998a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com pshufhw xmm3, xmm3, 0F5h // 8 alpha words 466098a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com pshuflw xmm3, xmm3, 0F5h 4661c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pand xmm2, xmm6 // _r_b 4662c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com paddw xmm3, xmm7 // 256 - alpha 4663c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pmullw xmm2, xmm3 // _r_b * alpha 4664c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com movd xmm1, [esi] // _a_g 4665c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com lea esi, [esi + 4] 4666c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com psrlw xmm1, 8 // _a_g 4667c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com por xmm0, xmm4 // set alpha to 255 4668c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pmullw xmm1, xmm3 // _a_g * alpha 4669c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com psrlw xmm2, 8 // _r_b convert to 8 bits again 4670c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com paddusb xmm0, xmm2 // + src argb 4671c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pand xmm1, xmm5 // a_g_ convert to 8 bits again 4672c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com paddusb xmm0, xmm1 // + src argb 4673c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com sub ecx, 1 4674c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com movd [edx], xmm0 4675c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com lea edx, [edx + 4] 4676bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com jge convertloop1 4677c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com 4678bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com convertloop1b: 4679c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pop esi 4680c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com ret 4681c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com } 4682c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com} 4683bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com#endif // HAS_ARGBBLENDROW_SSE2 4684c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com 4685c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com#ifdef HAS_ARGBBLENDROW_SSSE3 4686bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com// Shuffle table for isolating alpha. 4687851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const uvec8 kShuffleAlpha = { 4688d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com 3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, 4689d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80 4690d2f4413d29d15b94d971630ba555dd0cd8fcc8c2fbarchard@google.com}; 4691c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Same as SSE2, but replaces: 4692c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com// psrlw xmm3, 8 // alpha 469398a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com// pshufhw xmm3, xmm3, 0F5h // 8 alpha words 469498a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com// pshuflw xmm3, xmm3, 0F5h 4695c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com// with.. 4696c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com// pshufb xmm3, kShuffleAlpha // alpha 4697c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Blend 8 pixels at a time. 4698c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com 4699c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com__declspec(naked) __declspec(align(16)) 4700bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.comvoid ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, 4701bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com uint8* dst_argb, int width) { 4702c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com __asm { 4703c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com push esi 4704c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com mov eax, [esp + 4 + 4] // src_argb0 4705c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com mov esi, [esp + 4 + 8] // src_argb1 4706c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com mov edx, [esp + 4 + 12] // dst_argb 4707c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com mov ecx, [esp + 4 + 16] // width 470838157bdc719c403a620218bc2a35af0f9b4adc85fbarchard@google.com pcmpeqb xmm7, xmm7 // generate constant 0x0001 4709c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com psrlw xmm7, 15 4710c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pcmpeqb xmm6, xmm6 // generate mask 0x00ff00ff 4711c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com psrlw xmm6, 8 4712c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00 4713c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com psllw xmm5, 8 4714c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pcmpeqb xmm4, xmm4 // generate mask 0xff000000 4715c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pslld xmm4, 24 4716c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com 4717bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com sub ecx, 1 4718bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com je convertloop1 // only 1 pixel? 4719bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com jl convertloop1b 4720bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com 4721bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com // 1 pixel loop until destination pointer is aligned. 4722bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com alignloop1: 4723dd3b137f5d9d87e92cb44f754f60b90ba50e31bcfbarchard@google.com test edx, 15 // aligned? 4724dd3b137f5d9d87e92cb44f754f60b90ba50e31bcfbarchard@google.com je alignloop1b 4725bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com movd xmm3, [eax] 4726bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com lea eax, [eax + 4] 4727bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com movdqa xmm0, xmm3 // src argb 4728bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com pxor xmm3, xmm4 // ~alpha 4729bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com movd xmm2, [esi] // _r_b 4730bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com pshufb xmm3, kShuffleAlpha // alpha 4731bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com pand xmm2, xmm6 // _r_b 4732bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com paddw xmm3, xmm7 // 256 - alpha 4733bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com pmullw xmm2, xmm3 // _r_b * alpha 4734bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com movd xmm1, [esi] // _a_g 4735bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com lea esi, [esi + 4] 4736bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com psrlw xmm1, 8 // _a_g 4737bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com por xmm0, xmm4 // set alpha to 255 4738bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com pmullw xmm1, xmm3 // _a_g * alpha 4739bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com psrlw xmm2, 8 // _r_b convert to 8 bits again 4740bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com paddusb xmm0, xmm2 // + src argb 4741bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com pand xmm1, xmm5 // a_g_ convert to 8 bits again 4742bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com paddusb xmm0, xmm1 // + src argb 4743bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com sub ecx, 1 4744bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com movd [edx], xmm0 4745bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com lea edx, [edx + 4] 4746bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com jge alignloop1 4747bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com 4748bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com alignloop1b: 4749bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com add ecx, 1 - 4 4750bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com jl convertloop4b 4751bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com 4752f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com test eax, 15 // unaligned? 4753f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com jne convertuloop4 4754f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com test esi, 15 // unaligned? 4755f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com jne convertuloop4 4756f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com 4757794fe1236a29b272cf36442c8c4c3d97a33ff64ffbarchard@google.com // 4 pixel loop. 4758bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com convertloop4: 4759f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com movdqa xmm3, [eax] // src argb 4760f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com lea eax, [eax + 16] 4761f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com movdqa xmm0, xmm3 // src argb 4762f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com pxor xmm3, xmm4 // ~alpha 4763f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com movdqa xmm2, [esi] // _r_b 4764f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com pshufb xmm3, kShuffleAlpha // alpha 4765f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com pand xmm2, xmm6 // _r_b 4766f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com paddw xmm3, xmm7 // 256 - alpha 4767f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com pmullw xmm2, xmm3 // _r_b * alpha 4768f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com movdqa xmm1, [esi] // _a_g 4769f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com lea esi, [esi + 16] 4770f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com psrlw xmm1, 8 // _a_g 4771f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com por xmm0, xmm4 // set alpha to 255 4772f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com pmullw xmm1, xmm3 // _a_g * alpha 4773f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com psrlw xmm2, 8 // _r_b convert to 8 bits again 4774f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com paddusb xmm0, xmm2 // + src argb 4775f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com pand xmm1, xmm5 // a_g_ convert to 8 bits again 4776f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com paddusb xmm0, xmm1 // + src argb 4777f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com sub ecx, 4 4778f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com movdqa [edx], xmm0 4779f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com lea edx, [edx + 16] 4780f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com jge convertloop4 4781f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com jmp convertloop4b 4782f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com 4783f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com // 4 pixel unaligned loop. 4784f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com convertuloop4: 4785794fe1236a29b272cf36442c8c4c3d97a33ff64ffbarchard@google.com movdqu xmm3, [eax] // src argb 4786794fe1236a29b272cf36442c8c4c3d97a33ff64ffbarchard@google.com lea eax, [eax + 16] 4787c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com movdqa xmm0, xmm3 // src argb 4788c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pxor xmm3, xmm4 // ~alpha 47891702ec78f85cc484e10eeac501971f76ab173b83fbarchard@google.com movdqu xmm2, [esi] // _r_b 4790794fe1236a29b272cf36442c8c4c3d97a33ff64ffbarchard@google.com pshufb xmm3, kShuffleAlpha // alpha 4791c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pand xmm2, xmm6 // _r_b 4792c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com paddw xmm3, xmm7 // 256 - alpha 4793c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pmullw xmm2, xmm3 // _r_b * alpha 47941702ec78f85cc484e10eeac501971f76ab173b83fbarchard@google.com movdqu xmm1, [esi] // _a_g 4795794fe1236a29b272cf36442c8c4c3d97a33ff64ffbarchard@google.com lea esi, [esi + 16] 4796c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com psrlw xmm1, 8 // _a_g 4797c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com por xmm0, xmm4 // set alpha to 255 4798c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pmullw xmm1, xmm3 // _a_g * alpha 4799c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com psrlw xmm2, 8 // _r_b convert to 8 bits again 4800c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com paddusb xmm0, xmm2 // + src argb 4801c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com pand xmm1, xmm5 // a_g_ convert to 8 bits again 4802c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com paddusb xmm0, xmm1 // + src argb 4803c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com sub ecx, 4 4804c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com movdqa [edx], xmm0 4805794fe1236a29b272cf36442c8c4c3d97a33ff64ffbarchard@google.com lea edx, [edx + 16] 4806f877e71995128ae4c945591574367da5d66a1ac4fbarchard@google.com jge convertuloop4 4807c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com 4808bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com convertloop4b: 4809bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com add ecx, 4 - 1 4810bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com jl convertloop1b 4811c757f308eab211f9d5467a089052e7d84606f6c1fbarchard@google.com 4812bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com // 1 pixel loop. 4813bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com convertloop1: 4814794fe1236a29b272cf36442c8c4c3d97a33ff64ffbarchard@google.com movd xmm3, [eax] // src argb 48155ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com lea eax, [eax + 4] 48165ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com movdqa xmm0, xmm3 // src argb 48175ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com pxor xmm3, xmm4 // ~alpha 48185ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com movd xmm2, [esi] // _r_b 48195ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com pshufb xmm3, kShuffleAlpha // alpha 48205ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com pand xmm2, xmm6 // _r_b 48215ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com paddw xmm3, xmm7 // 256 - alpha 48225ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com pmullw xmm2, xmm3 // _r_b * alpha 48235ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com movd xmm1, [esi] // _a_g 48245ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com lea esi, [esi + 4] 48255ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com psrlw xmm1, 8 // _a_g 48265ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com por xmm0, xmm4 // set alpha to 255 48275ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com pmullw xmm1, xmm3 // _a_g * alpha 48285ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com psrlw xmm2, 8 // _r_b convert to 8 bits again 48295ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com paddusb xmm0, xmm2 // + src argb 48305ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com pand xmm1, xmm5 // a_g_ convert to 8 bits again 48315ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com paddusb xmm0, xmm1 // + src argb 48325ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com sub ecx, 1 48335ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com movd [edx], xmm0 48345ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com lea edx, [edx + 4] 4835bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com jge convertloop1 48365ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com 4837bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com convertloop1b: 48385ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com pop esi 48395ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com ret 48405ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com } 48415ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com} 4842bac5f2c3ee12535817448e606c7a7704bbae8321fbarchard@google.com#endif // HAS_ARGBBLENDROW_SSSE3 48435ff3a8fec5fa54bca8905f1eb6eb69c14d5fb79ffbarchard@google.com 48441d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com#ifdef HAS_ARGBATTENUATEROW_SSE2 48458ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com// Attenuate 4 pixels at a time. 4846c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Aligned to 16 bytes. 48478ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com__declspec(naked) __declspec(align(16)) 48488ed54222e723037322579f15c36d4faddb924e91fbarchard@google.comvoid ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { 48498ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com __asm { 48508ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com mov eax, [esp + 4] // src_argb0 48518ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com mov edx, [esp + 8] // dst_argb 48528ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com mov ecx, [esp + 12] // width 48538ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com pcmpeqb xmm4, xmm4 // generate mask 0xff000000 48548ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com pslld xmm4, 24 48558ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0x00ffffff 48568ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com psrld xmm5, 8 48578ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com 4858c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 48598ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com convertloop: 48608ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com movdqa xmm0, [eax] // read 4 pixels 48618ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com punpcklbw xmm0, xmm0 // first 2 486298a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com pshufhw xmm2, xmm0, 0FFh // 8 alpha words 486398a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com pshuflw xmm2, xmm2, 0FFh 48648ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com pmulhuw xmm0, xmm2 // rgb * a 48658ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com movdqa xmm1, [eax] // read 4 pixels 48668ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com punpckhbw xmm1, xmm1 // next 2 pixels 486798a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com pshufhw xmm2, xmm1, 0FFh // 8 alpha words 486898a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com pshuflw xmm2, xmm2, 0FFh 48698ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com pmulhuw xmm1, xmm2 // rgb * a 4870810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com movdqa xmm2, [eax] // alphas 4871008ecea4fe387388255f9d5ffcd8f11cc462b64bfbarchard@google.com lea eax, [eax + 16] 48728ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com psrlw xmm0, 8 4873810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com pand xmm2, xmm4 48748ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com psrlw xmm1, 8 48758ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com packuswb xmm0, xmm1 48768ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com pand xmm0, xmm5 // keep original alphas 4877810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com por xmm0, xmm2 48788ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com sub ecx, 4 4879008ecea4fe387388255f9d5ffcd8f11cc462b64bfbarchard@google.com movdqa [edx], xmm0 4880008ecea4fe387388255f9d5ffcd8f11cc462b64bfbarchard@google.com lea edx, [edx + 16] 48818ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com jg convertloop 48828ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com 48838ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com ret 48848ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com } 48858ed54222e723037322579f15c36d4faddb924e91fbarchard@google.com} 48861d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com#endif // HAS_ARGBATTENUATEROW_SSE2 4887f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com 4888eeac2903ef22110d475c50ef9bfd7826d3183a5efbarchard@google.com#ifdef HAS_ARGBATTENUATEROW_SSSE3 4889c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Shuffle table duplicating alpha. 4890851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const uvec8 kShuffleAlpha0 = { 4891f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u, 4892f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com}; 4893851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const uvec8 kShuffleAlpha1 = { 4894f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, 4895f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u, 4896f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com}; 4897f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com__declspec(naked) __declspec(align(16)) 4898f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.comvoid ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { 48994660679ff38f9ef755ce914ec1bf98d4ad040540fbarchard@google.com __asm { 4900f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com mov eax, [esp + 4] // src_argb0 4901f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com mov edx, [esp + 8] // dst_argb 4902f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com mov ecx, [esp + 12] // width 4903f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com pcmpeqb xmm3, xmm3 // generate mask 0xff000000 4904f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com pslld xmm3, 24 4905f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com movdqa xmm4, kShuffleAlpha0 4906f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com movdqa xmm5, kShuffleAlpha1 4907f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com 4908c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4909f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com convertloop: 491038157bdc719c403a620218bc2a35af0f9b4adc85fbarchard@google.com movdqu xmm0, [eax] // read 4 pixels 4911810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com pshufb xmm0, xmm4 // isolate first 2 alphas 491238157bdc719c403a620218bc2a35af0f9b4adc85fbarchard@google.com movdqu xmm1, [eax] // read 4 pixels 4913f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com punpcklbw xmm1, xmm1 // first 2 pixel rgbs 4914f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com pmulhuw xmm0, xmm1 // rgb * a 491538157bdc719c403a620218bc2a35af0f9b4adc85fbarchard@google.com movdqu xmm1, [eax] // read 4 pixels 4916f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com pshufb xmm1, xmm5 // isolate next 2 alphas 491738157bdc719c403a620218bc2a35af0f9b4adc85fbarchard@google.com movdqu xmm2, [eax] // read 4 pixels 4918f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com punpckhbw xmm2, xmm2 // next 2 pixel rgbs 4919f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com pmulhuw xmm1, xmm2 // rgb * a 492038157bdc719c403a620218bc2a35af0f9b4adc85fbarchard@google.com movdqu xmm2, [eax] // mask original alpha 4921008ecea4fe387388255f9d5ffcd8f11cc462b64bfbarchard@google.com lea eax, [eax + 16] 4922f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com pand xmm2, xmm3 4923f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com psrlw xmm0, 8 4924f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com psrlw xmm1, 8 4925f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com packuswb xmm0, xmm1 4926f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com por xmm0, xmm2 // copy original alpha 4927f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com sub ecx, 4 492838157bdc719c403a620218bc2a35af0f9b4adc85fbarchard@google.com movdqu [edx], xmm0 4929008ecea4fe387388255f9d5ffcd8f11cc462b64bfbarchard@google.com lea edx, [edx + 16] 4930f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com jg convertloop 4931f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com 4932f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com ret 4933f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com } 4934f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com} 4935eeac2903ef22110d475c50ef9bfd7826d3183a5efbarchard@google.com#endif // HAS_ARGBATTENUATEROW_SSSE3 4936f2c86d01cc46b0851e0bf88429dd064b8c8b0dbafbarchard@google.com 4937d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com#ifdef HAS_ARGBATTENUATEROW_AVX2 4938d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com// Shuffle table duplicating alpha. 4939851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const ulvec8 kShuffleAlpha_AVX2 = { 4940d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, 4941d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u, 4942d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, 4943d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u, 4944d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com}; 4945d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com__declspec(naked) __declspec(align(16)) 4946d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.comvoid ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) { 4947d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com __asm { 4948d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com mov eax, [esp + 4] // src_argb0 4949d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com mov edx, [esp + 8] // dst_argb 4950d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com mov ecx, [esp + 12] // width 4951d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com sub edx, eax 4952d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com vmovdqa ymm4, kShuffleAlpha_AVX2 4953d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000 4954d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com vpslld ymm5, ymm5, 24 4955d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com 4956c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4957d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com convertloop: 4958d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com vmovdqu ymm6, [eax] // read 8 pixels. 4959d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated. 4960d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated. 4961d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com vpshufb ymm2, ymm0, ymm4 // low 4 alphas 4962d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com vpshufb ymm3, ymm1, ymm4 // high 4 alphas 4963d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com vpmulhuw ymm0, ymm0, ymm2 // rgb * a 4964d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com vpmulhuw ymm1, ymm1, ymm3 // rgb * a 4965d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com vpand ymm6, ymm6, ymm5 // isolate alpha 4966d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com vpsrlw ymm0, ymm0, 8 4967d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com vpsrlw ymm1, ymm1, 8 4968d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com vpackuswb ymm0, ymm0, ymm1 // unmutated. 4969d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com vpor ymm0, ymm0, ymm6 // copy original alpha 4970d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com sub ecx, 8 4971d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com vmovdqu [eax + edx], ymm0 4972d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com lea eax, [eax + 32] 4973d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com jg convertloop 4974d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com 49759b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 4976d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com ret 4977d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com } 4978d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com} 4979d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com#endif // HAS_ARGBATTENUATEROW_AVX2 4980d5ee3dc9123c9fa4e90ec6b90a5c45f8434cac3ffbarchard@google.com 4981eeac2903ef22110d475c50ef9bfd7826d3183a5efbarchard@google.com#ifdef HAS_ARGBUNATTENUATEROW_SSE2 4982810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com// Unattenuate 4 pixels at a time. 4983c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Aligned to 16 bytes. 4984810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com__declspec(naked) __declspec(align(16)) 4985810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.comvoid ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, 4986810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com int width) { 4987810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com __asm { 4988810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com push esi 4989810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com push edi 4990810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com mov eax, [esp + 8 + 4] // src_argb0 4991810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com mov edx, [esp + 8 + 8] // dst_argb 4992810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com mov ecx, [esp + 8 + 12] // width 4993810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com 4994c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 4995810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com convertloop: 499638157bdc719c403a620218bc2a35af0f9b4adc85fbarchard@google.com movdqu xmm0, [eax] // read 4 pixels 4997810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com movzx esi, byte ptr [eax + 3] // first alpha 4998810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com movzx edi, byte ptr [eax + 7] // second alpha 4999810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com punpcklbw xmm0, xmm0 // first 2 5000810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com movd xmm2, dword ptr fixed_invtbl8[esi * 4] 5001810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com movd xmm3, dword ptr fixed_invtbl8[edi * 4] 500298a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words. 1, a, a, a 500398a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words 5004810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com movlhps xmm2, xmm3 5005810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com pmulhuw xmm0, xmm2 // rgb * a 5006810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com 500738157bdc719c403a620218bc2a35af0f9b4adc85fbarchard@google.com movdqu xmm1, [eax] // read 4 pixels 5008810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com movzx esi, byte ptr [eax + 11] // third alpha 5009810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com movzx edi, byte ptr [eax + 15] // forth alpha 5010810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com punpckhbw xmm1, xmm1 // next 2 5011810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com movd xmm2, dword ptr fixed_invtbl8[esi * 4] 5012810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com movd xmm3, dword ptr fixed_invtbl8[edi * 4] 501398a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words 501498a1fbf5e9797112515d591b1262db6ae049b8fafbarchard@google.com pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words 5015810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com movlhps xmm2, xmm3 5016810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com pmulhuw xmm1, xmm2 // rgb * a 5017bb5ea8e4df7aba47d39a0b6e74ce7cc85e5c8c3afbarchard@google.com lea eax, [eax + 16] 5018810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com 5019810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com packuswb xmm0, xmm1 5020810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com sub ecx, 4 502138157bdc719c403a620218bc2a35af0f9b4adc85fbarchard@google.com movdqu [edx], xmm0 5022bb5ea8e4df7aba47d39a0b6e74ce7cc85e5c8c3afbarchard@google.com lea edx, [edx + 16] 5023810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com jg convertloop 5024810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com pop edi 5025810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com pop esi 5026810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com ret 5027810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com } 5028810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com} 5029eeac2903ef22110d475c50ef9bfd7826d3183a5efbarchard@google.com#endif // HAS_ARGBUNATTENUATEROW_SSE2 5030810cd91079505f04cfec7481b51d04f08250d982fbarchard@google.com 50313c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com#ifdef HAS_ARGBUNATTENUATEROW_AVX2 50323c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com// Shuffle table duplicating alpha. 5033851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const ulvec8 kUnattenShuffleAlpha_AVX2 = { 5034787f82766394fc13ff99bb68308c922c014a6f1dfbarchard@google.com 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15, 5035787f82766394fc13ff99bb68308c922c014a6f1dfbarchard@google.com 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15, 50363c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com}; 5037805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com// TODO(fbarchard): Enable USE_GATHER for future hardware if faster. 5038805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com// USE_GATHER is not on by default, due to being a slow instruction. 5039805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com#ifdef USE_GATHER 50403c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com__declspec(naked) __declspec(align(16)) 50413c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.comvoid ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, 50423c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com int width) { 50433c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com __asm { 50443c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com mov eax, [esp + 4] // src_argb0 50453c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com mov edx, [esp + 8] // dst_argb 50463c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com mov ecx, [esp + 12] // width 50473c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com sub edx, eax 50483c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com vmovdqa ymm4, kUnattenShuffleAlpha_AVX2 50493c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com 5050c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 50513c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com convertloop: 50523c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com vmovdqu ymm6, [eax] // read 8 pixels. 5053787f82766394fc13ff99bb68308c922c014a6f1dfbarchard@google.com vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xffffffff for gather. 50543c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com vpsrld ymm2, ymm6, 24 // alpha in low 8 bits. 50553c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated. 50563c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated. 5057787f82766394fc13ff99bb68308c922c014a6f1dfbarchard@google.com vpgatherdd ymm3, [ymm2 * 4 + fixed_invtbl8], ymm5 // ymm5 cleared. 1, a 5058787f82766394fc13ff99bb68308c922c014a6f1dfbarchard@google.com vpunpcklwd ymm2, ymm3, ymm3 // low 4 inverted alphas. mutated. 1, 1, a, a 5059787f82766394fc13ff99bb68308c922c014a6f1dfbarchard@google.com vpunpckhwd ymm3, ymm3, ymm3 // high 4 inverted alphas. mutated. 5060787f82766394fc13ff99bb68308c922c014a6f1dfbarchard@google.com vpshufb ymm2, ymm2, ymm4 // replicate low 4 alphas. 1, a, a, a 50613c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com vpshufb ymm3, ymm3, ymm4 // replicate high 4 alphas 50623c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com vpmulhuw ymm0, ymm0, ymm2 // rgb * ia 50633c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com vpmulhuw ymm1, ymm1, ymm3 // rgb * ia 50643c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com vpackuswb ymm0, ymm0, ymm1 // unmutated. 50653c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com sub ecx, 8 50663c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com vmovdqu [eax + edx], ymm0 50673c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com lea eax, [eax + 32] 50683c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com jg convertloop 50693c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com 50709b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 50713c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com ret 50723c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com } 50733c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com} 5074805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com#else // USE_GATHER 5075805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com__declspec(naked) __declspec(align(16)) 5076805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.comvoid ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, 5077805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com int width) { 5078805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com __asm { 5079805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com 5080805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com mov eax, [esp + 4] // src_argb0 5081805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com mov edx, [esp + 8] // dst_argb 5082805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com mov ecx, [esp + 12] // width 5083805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com sub edx, eax 5084805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vmovdqa ymm5, kUnattenShuffleAlpha_AVX2 5085805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com 5086805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com push esi 5087805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com push edi 5088805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com 5089c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 5090805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com convertloop: 5091805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com // replace VPGATHER 50929b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com movzx esi, byte ptr [eax + 3] // alpha0 50939b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com movzx edi, byte ptr [eax + 7] // alpha1 5094805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vmovd xmm0, dword ptr fixed_invtbl8[esi * 4] // [1,a0] 5095805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vmovd xmm1, dword ptr fixed_invtbl8[edi * 4] // [1,a1] 50969b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com movzx esi, byte ptr [eax + 11] // alpha2 50979b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com movzx edi, byte ptr [eax + 15] // alpha3 5098805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vpunpckldq xmm6, xmm0, xmm1 // [1,a1,1,a0] 5099805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vmovd xmm2, dword ptr fixed_invtbl8[esi * 4] // [1,a2] 5100805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vmovd xmm3, dword ptr fixed_invtbl8[edi * 4] // [1,a3] 51019b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com movzx esi, byte ptr [eax + 19] // alpha4 51029b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com movzx edi, byte ptr [eax + 23] // alpha5 5103805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vpunpckldq xmm7, xmm2, xmm3 // [1,a3,1,a2] 5104805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vmovd xmm0, dword ptr fixed_invtbl8[esi * 4] // [1,a4] 5105805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vmovd xmm1, dword ptr fixed_invtbl8[edi * 4] // [1,a5] 51069b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com movzx esi, byte ptr [eax + 27] // alpha6 51079b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com movzx edi, byte ptr [eax + 31] // alpha7 5108805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vpunpckldq xmm0, xmm0, xmm1 // [1,a5,1,a4] 5109805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vmovd xmm2, dword ptr fixed_invtbl8[esi * 4] // [1,a6] 5110805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vmovd xmm3, dword ptr fixed_invtbl8[edi * 4] // [1,a7] 5111805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vpunpckldq xmm2, xmm2, xmm3 // [1,a7,1,a6] 5112805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vpunpcklqdq xmm3, xmm6, xmm7 // [1,a3,1,a2,1,a1,1,a0] 5113805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vpunpcklqdq xmm0, xmm0, xmm2 // [1,a7,1,a6,1,a5,1,a4] 5114805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vinserti128 ymm3, ymm3, xmm0, 1 // [1,a7,1,a6,1,a5,1,a4,1,a3,1,a2,1,a1,1,a0] 5115805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com // end of VPGATHER 5116805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com 5117805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vmovdqu ymm6, [eax] // read 8 pixels. 5118805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated. 5119805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated. 5120805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vpunpcklwd ymm2, ymm3, ymm3 // low 4 inverted alphas. mutated. 1, 1, a, a 5121805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vpunpckhwd ymm3, ymm3, ymm3 // high 4 inverted alphas. mutated. 5122805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vpshufb ymm2, ymm2, ymm5 // replicate low 4 alphas. 1, a, a, a 5123805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vpshufb ymm3, ymm3, ymm5 // replicate high 4 alphas 5124805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vpmulhuw ymm0, ymm0, ymm2 // rgb * ia 5125805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vpmulhuw ymm1, ymm1, ymm3 // rgb * ia 5126805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vpackuswb ymm0, ymm0, ymm1 // unmutated. 5127805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com sub ecx, 8 5128805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com vmovdqu [eax + edx], ymm0 5129805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com lea eax, [eax + 32] 5130805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com jg convertloop 5131805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com 5132805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com pop edi 5133805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com pop esi 51349b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 5135805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com ret 5136805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com } 5137805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com} 5138805fefb9d87bfbe00ae435d779e867c48c10d530fbarchard@google.com#endif // USE_GATHER 51393c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com#endif // HAS_ARGBATTENUATEROW_AVX2 51403c7bb050bd54264f360ced29c1cd2777483bd6f0fbarchard@google.com 5141ffaea7eee38e593a3e63553ffa90e554ba81fe30fbarchard@google.com#ifdef HAS_ARGBGRAYROW_SSSE3 5142c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels. 5143ffaea7eee38e593a3e63553ffa90e554ba81fe30fbarchard@google.com__declspec(naked) __declspec(align(16)) 5144eeac2903ef22110d475c50ef9bfd7826d3183a5efbarchard@google.comvoid ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { 5145ffaea7eee38e593a3e63553ffa90e554ba81fe30fbarchard@google.com __asm { 5146eeac2903ef22110d475c50ef9bfd7826d3183a5efbarchard@google.com mov eax, [esp + 4] /* src_argb */ 5147eeac2903ef22110d475c50ef9bfd7826d3183a5efbarchard@google.com mov edx, [esp + 8] /* dst_argb */ 5148eeac2903ef22110d475c50ef9bfd7826d3183a5efbarchard@google.com mov ecx, [esp + 12] /* width */ 5149050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm4, kARGBToYJ 5150050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com movdqa xmm5, kAddYJ64 5151ffaea7eee38e593a3e63553ffa90e554ba81fe30fbarchard@google.com 5152c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 5153ffaea7eee38e593a3e63553ffa90e554ba81fe30fbarchard@google.com convertloop: 5154221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa xmm0, [eax] // G 5155ffaea7eee38e593a3e63553ffa90e554ba81fe30fbarchard@google.com movdqa xmm1, [eax + 16] 5156ffaea7eee38e593a3e63553ffa90e554ba81fe30fbarchard@google.com pmaddubsw xmm0, xmm4 5157ffaea7eee38e593a3e63553ffa90e554ba81fe30fbarchard@google.com pmaddubsw xmm1, xmm4 5158ffaea7eee38e593a3e63553ffa90e554ba81fe30fbarchard@google.com phaddw xmm0, xmm1 5159050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com paddw xmm0, xmm5 // Add .5 for rounding. 5160ffaea7eee38e593a3e63553ffa90e554ba81fe30fbarchard@google.com psrlw xmm0, 7 5161221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com packuswb xmm0, xmm0 // 8 G bytes 5162221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa xmm2, [eax] // A 5163221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa xmm3, [eax + 16] 5164b8ffdc9e574a1552955dbb62369082c7a475e9fafbarchard@google.com lea eax, [eax + 32] 5165221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com psrld xmm2, 24 5166221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com psrld xmm3, 24 5167221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com packuswb xmm2, xmm3 5168221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com packuswb xmm2, xmm2 // 8 A bytes 5169221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa xmm3, xmm0 // Weave into GG, GA, then GGGA 5170221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com punpcklbw xmm0, xmm0 // 8 GG words 5171221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com punpcklbw xmm3, xmm2 // 8 GA words 5172ffaea7eee38e593a3e63553ffa90e554ba81fe30fbarchard@google.com movdqa xmm1, xmm0 5173221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com punpcklwd xmm0, xmm3 // GGGA first 4 5174221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com punpckhwd xmm1, xmm3 // GGGA next 4 5175ffaea7eee38e593a3e63553ffa90e554ba81fe30fbarchard@google.com sub ecx, 8 5176b8ffdc9e574a1552955dbb62369082c7a475e9fafbarchard@google.com movdqa [edx], xmm0 5177b8ffdc9e574a1552955dbb62369082c7a475e9fafbarchard@google.com movdqa [edx + 16], xmm1 5178b8ffdc9e574a1552955dbb62369082c7a475e9fafbarchard@google.com lea edx, [edx + 32] 5179ffaea7eee38e593a3e63553ffa90e554ba81fe30fbarchard@google.com jg convertloop 5180ffaea7eee38e593a3e63553ffa90e554ba81fe30fbarchard@google.com ret 5181ffaea7eee38e593a3e63553ffa90e554ba81fe30fbarchard@google.com } 5182ffaea7eee38e593a3e63553ffa90e554ba81fe30fbarchard@google.com} 5183ffaea7eee38e593a3e63553ffa90e554ba81fe30fbarchard@google.com#endif // HAS_ARGBGRAYROW_SSSE3 5184221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com 5185221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com#ifdef HAS_ARGBSEPIAROW_SSSE3 5186221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com// b = (r * 35 + g * 68 + b * 17) >> 7 5187221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com// g = (r * 45 + g * 88 + b * 22) >> 7 5188221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com// r = (r * 50 + g * 98 + b * 24) >> 7 5189c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Constant for ARGB color to sepia tone. 5190851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kARGBToSepiaB = { 5191221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0 5192221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com}; 5193221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com 5194851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kARGBToSepiaG = { 5195221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0 5196221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com}; 5197221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com 5198851a702b39aefb717990a0578d8701d051cbab32fbarchard@google.comstatic const vec8 kARGBToSepiaR = { 5199221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0 5200221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com}; 5201221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com 5202e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. 5203221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com__declspec(naked) __declspec(align(16)) 5204221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.comvoid ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) { 5205221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com __asm { 5206221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com mov eax, [esp + 4] /* dst_argb */ 5207221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com mov ecx, [esp + 8] /* width */ 5208221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa xmm2, kARGBToSepiaB 5209221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa xmm3, kARGBToSepiaG 5210221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa xmm4, kARGBToSepiaR 5211221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com 5212c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 5213221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com convertloop: 5214221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa xmm0, [eax] // B 5215221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa xmm6, [eax + 16] 5216221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com pmaddubsw xmm0, xmm2 5217221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com pmaddubsw xmm6, xmm2 5218221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com phaddw xmm0, xmm6 5219221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com psrlw xmm0, 7 5220221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com packuswb xmm0, xmm0 // 8 B values 5221221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa xmm5, [eax] // G 5222221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa xmm1, [eax + 16] 5223221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com pmaddubsw xmm5, xmm3 5224221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com pmaddubsw xmm1, xmm3 5225221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com phaddw xmm5, xmm1 5226221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com psrlw xmm5, 7 5227221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com packuswb xmm5, xmm5 // 8 G values 5228221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com punpcklbw xmm0, xmm5 // 8 BG values 5229221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa xmm5, [eax] // R 5230221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa xmm1, [eax + 16] 5231221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com pmaddubsw xmm5, xmm4 5232221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com pmaddubsw xmm1, xmm4 5233221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com phaddw xmm5, xmm1 5234221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com psrlw xmm5, 7 5235221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com packuswb xmm5, xmm5 // 8 R values 5236221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa xmm6, [eax] // A 5237221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa xmm1, [eax + 16] 5238221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com psrld xmm6, 24 5239221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com psrld xmm1, 24 5240221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com packuswb xmm6, xmm1 5241221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com packuswb xmm6, xmm6 // 8 A values 5242221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com punpcklbw xmm5, xmm6 // 8 RA values 5243221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa xmm1, xmm0 // Weave BG, RA together 5244221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com punpcklwd xmm0, xmm5 // BGRA first 4 5245221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com punpckhwd xmm1, xmm5 // BGRA next 4 5246221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com sub ecx, 8 5247221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa [eax], xmm0 5248221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com movdqa [eax + 16], xmm1 5249221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com lea eax, [eax + 32] 5250221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com jg convertloop 5251221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com ret 5252221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com } 5253221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com} 5254221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com#endif // HAS_ARGBSEPIAROW_SSSE3 525581b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com 5256e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3 5257e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com// Tranform 8 ARGB pixels (32 bytes) with color matrix. 5258e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com// Same as Sepia except matrix is provided. 525964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R 5260e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com// and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd. 5261e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com__declspec(naked) __declspec(align(16)) 5262c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.comvoid ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, 5263c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com const int8* matrix_argb, int width) { 5264e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com __asm { 5265c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com mov eax, [esp + 4] /* src_argb */ 5266c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com mov edx, [esp + 8] /* dst_argb */ 5267c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com mov ecx, [esp + 12] /* matrix_argb */ 52684a4b7374c12598560904609c91059003b57bc3d4fbarchard@google.com movdqu xmm5, [ecx] 52694a4b7374c12598560904609c91059003b57bc3d4fbarchard@google.com pshufd xmm2, xmm5, 0x00 52704a4b7374c12598560904609c91059003b57bc3d4fbarchard@google.com pshufd xmm3, xmm5, 0x55 52714a4b7374c12598560904609c91059003b57bc3d4fbarchard@google.com pshufd xmm4, xmm5, 0xaa 52724a4b7374c12598560904609c91059003b57bc3d4fbarchard@google.com pshufd xmm5, xmm5, 0xff 5273c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com mov ecx, [esp + 16] /* width */ 5274e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com 527511a0d48e45a7acd5aaf6b914caeee06432f06b6bfbarchard@google.com align 4 5276e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com convertloop: 5277e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com movdqa xmm0, [eax] // B 5278c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com movdqa xmm7, [eax + 16] 5279e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com pmaddubsw xmm0, xmm2 5280c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com pmaddubsw xmm7, xmm2 5281c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com movdqa xmm6, [eax] // G 5282e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com movdqa xmm1, [eax + 16] 5283c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com pmaddubsw xmm6, xmm3 5284e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com pmaddubsw xmm1, xmm3 5285c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com phaddsw xmm0, xmm7 // B 5286c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com phaddsw xmm6, xmm1 // G 5287c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com psraw xmm0, 6 // B 5288c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com psraw xmm6, 6 // G 52898f439eac1dc6352c214d3797a2af5cee80ead300fbarchard@google.com packuswb xmm0, xmm0 // 8 B values 5290c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com packuswb xmm6, xmm6 // 8 G values 5291c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com punpcklbw xmm0, xmm6 // 8 BG values 5292c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com movdqa xmm1, [eax] // R 5293c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com movdqa xmm7, [eax + 16] 5294e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com pmaddubsw xmm1, xmm4 5295c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com pmaddubsw xmm7, xmm4 5296c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com phaddsw xmm1, xmm7 // R 5297e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com movdqa xmm6, [eax] // A 5298c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com movdqa xmm7, [eax + 16] 5299c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com pmaddubsw xmm6, xmm5 5300c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com pmaddubsw xmm7, xmm5 5301c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com phaddsw xmm6, xmm7 // A 5302c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com psraw xmm1, 6 // R 5303c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com psraw xmm6, 6 // A 5304c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com packuswb xmm1, xmm1 // 8 R values 5305e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com packuswb xmm6, xmm6 // 8 A values 5306c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com punpcklbw xmm1, xmm6 // 8 RA values 5307c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com movdqa xmm6, xmm0 // Weave BG, RA together 5308c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com punpcklwd xmm0, xmm1 // BGRA first 4 5309c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com punpckhwd xmm6, xmm1 // BGRA next 4 5310e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com sub ecx, 8 5311c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com movdqa [edx], xmm0 5312c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com movdqa [edx + 16], xmm6 5313e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com lea eax, [eax + 32] 5314c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com lea edx, [edx + 32] 5315e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com jg convertloop 5316e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com ret 5317e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com } 5318e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com} 5319e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com#endif // HAS_ARGBCOLORMATRIXROW_SSSE3 5320e442dc4c2a896e85419628e3b7d97c4dfbe71c9dfbarchard@google.com 532181b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com#ifdef HAS_ARGBQUANTIZEROW_SSE2 532281b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com// Quantize 4 ARGB pixels (16 bytes). 5323c4c578e327a9dbc9dafe113634612e9a349a8c1ffbarchard@google.com// Aligned to 16 bytes. 532481b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com__declspec(naked) __declspec(align(16)) 532581b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.comvoid ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, 532681b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com int interval_offset, int width) { 532781b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com __asm { 532881b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com mov eax, [esp + 4] /* dst_argb */ 532981b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com movd xmm2, [esp + 8] /* scale */ 533081b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com movd xmm3, [esp + 12] /* interval_size */ 533181b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com movd xmm4, [esp + 16] /* interval_offset */ 533281b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com mov ecx, [esp + 20] /* width */ 533381b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com pshuflw xmm2, xmm2, 040h 533481b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com pshufd xmm2, xmm2, 044h 533581b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com pshuflw xmm3, xmm3, 040h 533681b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com pshufd xmm3, xmm3, 044h 533781b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com pshuflw xmm4, xmm4, 040h 533881b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com pshufd xmm4, xmm4, 044h 533981b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com pxor xmm5, xmm5 // constant 0 534081b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com pcmpeqb xmm6, xmm6 // generate mask 0xff000000 534181b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com pslld xmm6, 24 534281b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com 5343c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 534481b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com convertloop: 534581b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com movdqa xmm0, [eax] // read 4 pixels 534681b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com punpcklbw xmm0, xmm5 // first 2 pixels 534781b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com pmulhuw xmm0, xmm2 // pixel * scale >> 16 534881b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com movdqa xmm1, [eax] // read 4 pixels 534981b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com punpckhbw xmm1, xmm5 // next 2 pixels 535081b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com pmulhuw xmm1, xmm2 535181b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com pmullw xmm0, xmm3 // * interval_size 535281b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com movdqa xmm7, [eax] // read 4 pixels 535381b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com pmullw xmm1, xmm3 535481b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com pand xmm7, xmm6 // mask alpha 535581b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com paddw xmm0, xmm4 // + interval_size / 2 535681b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com paddw xmm1, xmm4 535781b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com packuswb xmm0, xmm1 535881b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com por xmm0, xmm7 535981b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com sub ecx, 4 536081b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com movdqa [eax], xmm0 536181b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com lea eax, [eax + 16] 536281b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com jg convertloop 536381b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com ret 536481b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com } 536581b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com} 536681b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com#endif // HAS_ARGBQUANTIZEROW_SSE2 536781b804e35c0346ee2fc5f8d11945eab9a88fdb10fbarchard@google.com 5368b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com#ifdef HAS_ARGBSHADEROW_SSE2 5369b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com// Shade 4 pixels at a time by specified value. 5370b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com// Aligned to 16 bytes. 5371b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com__declspec(naked) __declspec(align(16)) 5372b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.comvoid ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, 5373b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com uint32 value) { 5374b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com __asm { 5375b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com mov eax, [esp + 4] // src_argb 5376b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com mov edx, [esp + 8] // dst_argb 5377b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com mov ecx, [esp + 12] // width 5378b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com movd xmm2, [esp + 16] // value 5379b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com punpcklbw xmm2, xmm2 5380b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com punpcklqdq xmm2, xmm2 5381b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com 5382c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 5383b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com convertloop: 5384b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com movdqa xmm0, [eax] // read 4 pixels 5385abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea eax, [eax + 16] 5386b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com movdqa xmm1, xmm0 5387b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com punpcklbw xmm0, xmm0 // first 2 5388b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com punpckhbw xmm1, xmm1 // next 2 5389b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com pmulhuw xmm0, xmm2 // argb * value 5390b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com pmulhuw xmm1, xmm2 // argb * value 5391b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com psrlw xmm0, 8 5392b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com psrlw xmm1, 8 5393b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com packuswb xmm0, xmm1 5394b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com sub ecx, 4 5395abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com movdqa [edx], xmm0 5396abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea edx, [edx + 16] 5397b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com jg convertloop 5398b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com 5399b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com ret 5400b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com } 5401b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com} 5402b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com#endif // HAS_ARGBSHADEROW_SSE2 5403b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com 5404b3c1a3fe796a6caf2a0bc8d0882bc65514d9b74dfbarchard@google.com#ifdef HAS_ARGBMULTIPLYROW_SSE2 540583e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com// Multiply 2 rows of ARGB pixels together, 4 pixels at a time. 5406b3c1a3fe796a6caf2a0bc8d0882bc65514d9b74dfbarchard@google.com__declspec(naked) __declspec(align(16)) 54078fa76349948802d728dd244a7b54051d751d8696fbarchard@google.comvoid ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, 54088fa76349948802d728dd244a7b54051d751d8696fbarchard@google.com uint8* dst_argb, int width) { 5409b3c1a3fe796a6caf2a0bc8d0882bc65514d9b74dfbarchard@google.com __asm { 54108fa76349948802d728dd244a7b54051d751d8696fbarchard@google.com push esi 54118fa76349948802d728dd244a7b54051d751d8696fbarchard@google.com mov eax, [esp + 4 + 4] // src_argb0 54128fa76349948802d728dd244a7b54051d751d8696fbarchard@google.com mov esi, [esp + 4 + 8] // src_argb1 54138fa76349948802d728dd244a7b54051d751d8696fbarchard@google.com mov edx, [esp + 4 + 12] // dst_argb 54148fa76349948802d728dd244a7b54051d751d8696fbarchard@google.com mov ecx, [esp + 4 + 16] // width 5415b3c1a3fe796a6caf2a0bc8d0882bc65514d9b74dfbarchard@google.com pxor xmm5, xmm5 // constant 0 5416b3c1a3fe796a6caf2a0bc8d0882bc65514d9b74dfbarchard@google.com 5417c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 5418b3c1a3fe796a6caf2a0bc8d0882bc65514d9b74dfbarchard@google.com convertloop: 5419bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com movdqu xmm0, [eax] // read 4 pixels from src_argb0 5420abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com movdqu xmm2, [esi] // read 4 pixels from src_argb1 5421bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com movdqu xmm1, xmm0 5422bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com movdqu xmm3, xmm2 5423abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com punpcklbw xmm0, xmm0 // first 2 5424abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com punpckhbw xmm1, xmm1 // next 2 5425abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com punpcklbw xmm2, xmm5 // first 2 5426abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com punpckhbw xmm3, xmm5 // next 2 5427abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com pmulhuw xmm0, xmm2 // src_argb0 * src_argb1 first 2 5428abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com pmulhuw xmm1, xmm3 // src_argb0 * src_argb1 next 2 5429abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea eax, [eax + 16] 5430abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea esi, [esi + 16] 5431b3c1a3fe796a6caf2a0bc8d0882bc65514d9b74dfbarchard@google.com packuswb xmm0, xmm1 5432b3c1a3fe796a6caf2a0bc8d0882bc65514d9b74dfbarchard@google.com sub ecx, 4 5433abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com movdqu [edx], xmm0 5434abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea edx, [edx + 16] 5435b3c1a3fe796a6caf2a0bc8d0882bc65514d9b74dfbarchard@google.com jg convertloop 5436b3c1a3fe796a6caf2a0bc8d0882bc65514d9b74dfbarchard@google.com 54378fa76349948802d728dd244a7b54051d751d8696fbarchard@google.com pop esi 5438b3c1a3fe796a6caf2a0bc8d0882bc65514d9b74dfbarchard@google.com ret 5439b3c1a3fe796a6caf2a0bc8d0882bc65514d9b74dfbarchard@google.com } 5440b3c1a3fe796a6caf2a0bc8d0882bc65514d9b74dfbarchard@google.com} 5441b3c1a3fe796a6caf2a0bc8d0882bc65514d9b74dfbarchard@google.com#endif // HAS_ARGBMULTIPLYROW_SSE2 5442b3c1a3fe796a6caf2a0bc8d0882bc65514d9b74dfbarchard@google.com 544383e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com#ifdef HAS_ARGBADDROW_SSE2 544483e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com// Add 2 rows of ARGB pixels together, 4 pixels at a time. 5445bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com// TODO(fbarchard): Port this to posix, neon and other math functions. 544683e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com__declspec(naked) __declspec(align(16)) 544783e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.comvoid ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, 544883e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com uint8* dst_argb, int width) { 544983e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com __asm { 545083e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com push esi 545183e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com mov eax, [esp + 4 + 4] // src_argb0 545283e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com mov esi, [esp + 4 + 8] // src_argb1 545383e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com mov edx, [esp + 4 + 12] // dst_argb 545483e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com mov ecx, [esp + 4 + 16] // width 545583e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com 5456bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com sub ecx, 4 5457bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com jl convertloop49 5458bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com 5459c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 5460bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com convertloop4: 5461bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com movdqu xmm0, [eax] // read 4 pixels from src_argb0 5462abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea eax, [eax + 16] 5463abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com movdqu xmm1, [esi] // read 4 pixels from src_argb1 5464abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea esi, [esi + 16] 546583e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com paddusb xmm0, xmm1 // src_argb0 + src_argb1 546683e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com sub ecx, 4 5467abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com movdqu [edx], xmm0 5468abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea edx, [edx + 16] 5469bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com jge convertloop4 5470bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com 5471bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com convertloop49: 5472bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com add ecx, 4 - 1 5473bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com jl convertloop19 547483e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com 5475bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com convertloop1: 5476bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com movd xmm0, [eax] // read 1 pixels from src_argb0 5477abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea eax, [eax + 4] 5478abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com movd xmm1, [esi] // read 1 pixels from src_argb1 5479abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea esi, [esi + 4] 5480bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com paddusb xmm0, xmm1 // src_argb0 + src_argb1 5481bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com sub ecx, 1 5482abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com movd [edx], xmm0 5483abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea edx, [edx + 4] 5484bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com jge convertloop1 5485bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com 5486bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com convertloop19: 548783e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com pop esi 548883e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com ret 548983e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com } 549083e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com} 549183e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com#endif // HAS_ARGBADDROW_SSE2 549283e1b17cc0b1840c7b5e361fa19e7263fca2b32bfbarchard@google.com 5493573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com#ifdef HAS_ARGBSUBTRACTROW_SSE2 5494573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com// Subtract 2 rows of ARGB pixels together, 4 pixels at a time. 5495573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com__declspec(naked) __declspec(align(16)) 5496573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.comvoid ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, 5497573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com uint8* dst_argb, int width) { 5498573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com __asm { 5499573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com push esi 5500573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com mov eax, [esp + 4 + 4] // src_argb0 5501573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com mov esi, [esp + 4 + 8] // src_argb1 5502573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com mov edx, [esp + 4 + 12] // dst_argb 5503573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com mov ecx, [esp + 4 + 16] // width 5504573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com 5505c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 5506573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com convertloop: 5507bb92acade0cd17a83ad32177da6a449b2962066efbarchard@google.com movdqu xmm0, [eax] // read 4 pixels from src_argb0 5508abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea eax, [eax + 16] 5509abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com movdqu xmm1, [esi] // read 4 pixels from src_argb1 5510abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea esi, [esi + 16] 5511573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com psubusb xmm0, xmm1 // src_argb0 - src_argb1 5512573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com sub ecx, 4 5513abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com movdqu [edx], xmm0 5514abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea edx, [edx + 16] 5515573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com jg convertloop 5516573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com 5517573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com pop esi 5518573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com ret 5519573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com } 5520573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com} 5521573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com#endif // HAS_ARGBSUBTRACTROW_SSE2 5522573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com 552351d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com#ifdef HAS_ARGBMULTIPLYROW_AVX2 552451d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com// Multiply 2 rows of ARGB pixels together, 8 pixels at a time. 552551d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com__declspec(naked) __declspec(align(16)) 552651d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.comvoid ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, 552751d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com uint8* dst_argb, int width) { 552851d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com __asm { 552951d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com push esi 553051d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com mov eax, [esp + 4 + 4] // src_argb0 553151d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com mov esi, [esp + 4 + 8] // src_argb1 553251d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com mov edx, [esp + 4 + 12] // dst_argb 553351d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com mov ecx, [esp + 4 + 16] // width 5534abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com vpxor ymm5, ymm5, ymm5 // constant 0 553551d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com 5536c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 553751d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com convertloop: 553851d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com vmovdqu ymm1, [eax] // read 8 pixels from src_argb0 5539abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea eax, [eax + 32] 5540abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com vmovdqu ymm3, [esi] // read 8 pixels from src_argb1 5541abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea esi, [esi + 32] 554251d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com vpunpcklbw ymm0, ymm1, ymm1 // low 4 554351d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com vpunpckhbw ymm1, ymm1, ymm1 // high 4 554451d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com vpunpcklbw ymm2, ymm3, ymm5 // low 4 554551d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com vpunpckhbw ymm3, ymm3, ymm5 // high 4 554651d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com vpmulhuw ymm0, ymm0, ymm2 // src_argb0 * src_argb1 low 4 554751d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com vpmulhuw ymm1, ymm1, ymm3 // src_argb0 * src_argb1 high 4 554851d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com vpackuswb ymm0, ymm0, ymm1 5549abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com vmovdqu [edx], ymm0 5550abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea edx, [edx + 32] 5551fc264019de6bf46f284907f966f7947cc51887e5fbarchard@google.com sub ecx, 8 555251d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com jg convertloop 555351d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com 555451d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com pop esi 55559b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 555651d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com ret 555751d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com } 555851d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com} 555951d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com#endif // HAS_ARGBMULTIPLYROW_AVX2 556051d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com 556151d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com#ifdef HAS_ARGBADDROW_AVX2 556251d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com// Add 2 rows of ARGB pixels together, 8 pixels at a time. 556351d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com__declspec(naked) __declspec(align(16)) 556451d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.comvoid ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, 556551d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com uint8* dst_argb, int width) { 556651d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com __asm { 556751d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com push esi 556851d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com mov eax, [esp + 4 + 4] // src_argb0 556951d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com mov esi, [esp + 4 + 8] // src_argb1 557051d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com mov edx, [esp + 4 + 12] // dst_argb 557151d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com mov ecx, [esp + 4 + 16] // width 557251d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com 5573c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 557451d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com convertloop: 557551d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com vmovdqu ymm0, [eax] // read 8 pixels from src_argb0 557651d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com lea eax, [eax + 32] 5577abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com vpaddusb ymm0, ymm0, [esi] // add 8 pixels from src_argb1 5578abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea esi, [esi + 32] 5579abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com vmovdqu [edx], ymm0 5580abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea edx, [edx + 32] 5581fc264019de6bf46f284907f966f7947cc51887e5fbarchard@google.com sub ecx, 8 558251d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com jg convertloop 558351d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com 558451d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com pop esi 55859b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 558651d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com ret 558751d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com } 558851d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com} 558951d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com#endif // HAS_ARGBADDROW_AVX2 559051d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com 559151d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com#ifdef HAS_ARGBSUBTRACTROW_AVX2 559251d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com// Subtract 2 rows of ARGB pixels together, 8 pixels at a time. 559351d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com__declspec(naked) __declspec(align(16)) 559451d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.comvoid ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, 559551d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com uint8* dst_argb, int width) { 559651d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com __asm { 559751d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com push esi 559851d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com mov eax, [esp + 4 + 4] // src_argb0 559951d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com mov esi, [esp + 4 + 8] // src_argb1 560051d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com mov edx, [esp + 4 + 12] // dst_argb 560151d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com mov ecx, [esp + 4 + 16] // width 560251d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com 5603c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 560451d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com convertloop: 560551d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com vmovdqu ymm0, [eax] // read 8 pixels from src_argb0 560651d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com lea eax, [eax + 32] 5607abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com vpsubusb ymm0, ymm0, [esi] // src_argb0 - src_argb1 5608abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea esi, [esi + 32] 5609abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com vmovdqu [edx], ymm0 5610abfeea9b81084185b0d3abc8fe5b5c4f496a0c18fbarchard@google.com lea edx, [edx + 32] 5611fc264019de6bf46f284907f966f7947cc51887e5fbarchard@google.com sub ecx, 8 561251d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com jg convertloop 561351d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com 561451d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com pop esi 56159b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 561651d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com ret 561751d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com } 561851d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com} 561951d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com#endif // HAS_ARGBSUBTRACTROW_AVX2 562051d3e236cb5923c60ba818b6e825c2658b565afefbarchard@google.com 5621092099507e44e9f429ec52956a20b28db634b910fbarchard@google.com#ifdef HAS_SOBELXROW_SSE2 5622e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com// SobelX as a matrix is 5623e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com// -1 0 1 5624e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com// -2 0 2 5625e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com// -1 0 1 5626e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com__declspec(naked) __declspec(align(16)) 5627092099507e44e9f429ec52956a20b28db634b910fbarchard@google.comvoid SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, 5628092099507e44e9f429ec52956a20b28db634b910fbarchard@google.com const uint8* src_y2, uint8* dst_sobelx, int width) { 5629e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com __asm { 5630e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com push esi 5631e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com push edi 5632e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com mov eax, [esp + 8 + 4] // src_y0 5633e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com mov esi, [esp + 8 + 8] // src_y1 5634e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com mov edi, [esp + 8 + 12] // src_y2 5635e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com mov edx, [esp + 8 + 16] // dst_sobelx 5636e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com mov ecx, [esp + 8 + 20] // width 5637e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com sub esi, eax 5638e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com sub edi, eax 5639e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com sub edx, eax 5640e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com pxor xmm5, xmm5 // constant 0 5641e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com 5642c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 5643e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com convertloop: 5644e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0] 5645e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com movq xmm1, qword ptr [eax + 2] // read 8 pixels from src_y0[2] 5646e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com punpcklbw xmm0, xmm5 5647e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com punpcklbw xmm1, xmm5 5648e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com psubw xmm0, xmm1 5649e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0] 5650e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com movq xmm2, qword ptr [eax + esi + 2] // read 8 pixels from src_y1[2] 5651e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com punpcklbw xmm1, xmm5 5652e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com punpcklbw xmm2, xmm5 5653e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com psubw xmm1, xmm2 5654e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com movq xmm2, qword ptr [eax + edi] // read 8 pixels from src_y2[0] 5655e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com movq xmm3, qword ptr [eax + edi + 2] // read 8 pixels from src_y2[2] 5656e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com punpcklbw xmm2, xmm5 5657e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com punpcklbw xmm3, xmm5 5658e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com psubw xmm2, xmm3 5659e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com paddw xmm0, xmm2 5660e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com paddw xmm0, xmm1 5661e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com paddw xmm0, xmm1 5662092099507e44e9f429ec52956a20b28db634b910fbarchard@google.com pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw 5663092099507e44e9f429ec52956a20b28db634b910fbarchard@google.com psubw xmm1, xmm0 5664092099507e44e9f429ec52956a20b28db634b910fbarchard@google.com pmaxsw xmm0, xmm1 5665e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com packuswb xmm0, xmm0 5666e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com sub ecx, 8 5667e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com movq qword ptr [eax + edx], xmm0 5668e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com lea eax, [eax + 8] 5669e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com jg convertloop 5670e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com 5671e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com pop edi 5672e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com pop esi 5673e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com ret 5674e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com } 5675e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com} 5676092099507e44e9f429ec52956a20b28db634b910fbarchard@google.com#endif // HAS_SOBELXROW_SSE2 5677e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com 5678092099507e44e9f429ec52956a20b28db634b910fbarchard@google.com#ifdef HAS_SOBELYROW_SSE2 5679e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com// SobelY as a matrix is 5680e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com// -1 -2 -1 5681e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com// 0 0 0 5682e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com// 1 2 1 5683e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com__declspec(naked) __declspec(align(16)) 5684092099507e44e9f429ec52956a20b28db634b910fbarchard@google.comvoid SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, 5685092099507e44e9f429ec52956a20b28db634b910fbarchard@google.com uint8* dst_sobely, int width) { 5686e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com __asm { 5687e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com push esi 5688e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com mov eax, [esp + 4 + 4] // src_y0 5689e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com mov esi, [esp + 4 + 8] // src_y1 5690e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com mov edx, [esp + 4 + 12] // dst_sobely 5691e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com mov ecx, [esp + 4 + 16] // width 5692e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com sub esi, eax 5693e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com sub edx, eax 5694e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com pxor xmm5, xmm5 // constant 0 5695e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com 5696c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 5697e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com convertloop: 5698e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0] 5699e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0] 5700e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com punpcklbw xmm0, xmm5 5701e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com punpcklbw xmm1, xmm5 5702e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com psubw xmm0, xmm1 5703e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com movq xmm1, qword ptr [eax + 1] // read 8 pixels from src_y0[1] 5704e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com movq xmm2, qword ptr [eax + esi + 1] // read 8 pixels from src_y1[1] 5705e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com punpcklbw xmm1, xmm5 5706e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com punpcklbw xmm2, xmm5 5707e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com psubw xmm1, xmm2 5708e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com movq xmm2, qword ptr [eax + 2] // read 8 pixels from src_y0[2] 5709e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com movq xmm3, qword ptr [eax + esi + 2] // read 8 pixels from src_y1[2] 5710e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com punpcklbw xmm2, xmm5 5711e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com punpcklbw xmm3, xmm5 5712e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com psubw xmm2, xmm3 5713e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com paddw xmm0, xmm2 5714e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com paddw xmm0, xmm1 5715e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com paddw xmm0, xmm1 5716092099507e44e9f429ec52956a20b28db634b910fbarchard@google.com pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw 5717092099507e44e9f429ec52956a20b28db634b910fbarchard@google.com psubw xmm1, xmm0 5718092099507e44e9f429ec52956a20b28db634b910fbarchard@google.com pmaxsw xmm0, xmm1 5719e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com packuswb xmm0, xmm0 5720e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com sub ecx, 8 5721e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com movq qword ptr [eax + edx], xmm0 5722e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com lea eax, [eax + 8] 5723e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com jg convertloop 5724e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com 5725e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com pop esi 5726e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com ret 5727e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com } 5728e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com} 5729092099507e44e9f429ec52956a20b28db634b910fbarchard@google.com#endif // HAS_SOBELYROW_SSE2 5730e1247eec9498c870a3d24cf9f70472a2fbb9825efbarchard@google.com 5731c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com#ifdef HAS_SOBELROW_SSE2 5732c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com// Adds Sobel X and Sobel Y and stores Sobel into ARGB. 5733c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com// A = 255 5734c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com// R = Sobel 5735c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com// G = Sobel 5736c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com// B = Sobel 5737c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com__declspec(naked) __declspec(align(16)) 5738c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.comvoid SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, 57398be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com uint8* dst_argb, int width) { 5740c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com __asm { 5741c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com push esi 5742c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com mov eax, [esp + 4 + 4] // src_sobelx 5743c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com mov esi, [esp + 4 + 8] // src_sobely 5744c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com mov edx, [esp + 4 + 12] // dst_argb 5745c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com mov ecx, [esp + 4 + 16] // width 5746c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com sub esi, eax 5747c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com pcmpeqb xmm5, xmm5 // alpha 255 5748c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com pslld xmm5, 24 // 0xff000000 5749c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com 5750c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 5751c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com convertloop: 5752c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com movdqa xmm0, [eax] // read 16 pixels src_sobelx 5753c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com movdqa xmm1, [eax + esi] // read 16 pixels src_sobely 5754c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com lea eax, [eax + 16] 5755c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com paddusb xmm0, xmm1 // sobel = sobelx + sobely 5756c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com movdqa xmm2, xmm0 // GG 5757c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com punpcklbw xmm2, xmm0 // First 8 5758c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com punpckhbw xmm0, xmm0 // Next 8 5759c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com movdqa xmm1, xmm2 // GGGG 5760c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com punpcklwd xmm1, xmm2 // First 4 5761c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com punpckhwd xmm2, xmm2 // Next 4 5762c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com por xmm1, xmm5 // GGGA 5763c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com por xmm2, xmm5 5764c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com movdqa xmm3, xmm0 // GGGG 5765c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com punpcklwd xmm3, xmm0 // Next 4 5766c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com punpckhwd xmm0, xmm0 // Last 4 5767c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com por xmm3, xmm5 // GGGA 5768c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com por xmm0, xmm5 5769c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com sub ecx, 16 5770c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com movdqa [edx], xmm1 5771c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com movdqa [edx + 16], xmm2 5772c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com movdqa [edx + 32], xmm3 5773c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com movdqa [edx + 48], xmm0 5774c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com lea edx, [edx + 64] 5775c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com jg convertloop 5776c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com 5777c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com pop esi 5778c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com ret 5779c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com } 5780c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com} 5781c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com#endif // HAS_SOBELROW_SSE2 5782c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com 57838be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com#ifdef HAS_SOBELTOPLANEROW_SSE2 57848be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com// Adds Sobel X and Sobel Y and stores Sobel into a plane. 57858be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com__declspec(naked) __declspec(align(16)) 57868be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.comvoid SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, 57878be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com uint8* dst_y, int width) { 57888be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com __asm { 57898be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com push esi 57908be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com mov eax, [esp + 4 + 4] // src_sobelx 57918be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com mov esi, [esp + 4 + 8] // src_sobely 57928be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com mov edx, [esp + 4 + 12] // dst_argb 57938be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com mov ecx, [esp + 4 + 16] // width 57948be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com sub esi, eax 57958be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com 5796c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 57978be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com convertloop: 57988be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com movdqa xmm0, [eax] // read 16 pixels src_sobelx 57998be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com movdqa xmm1, [eax + esi] // read 16 pixels src_sobely 58008be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com lea eax, [eax + 16] 58018be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com paddusb xmm0, xmm1 // sobel = sobelx + sobely 58028be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com sub ecx, 16 58038be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com movdqa [edx], xmm0 58048be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com lea edx, [edx + 16] 58058be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com jg convertloop 58068be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com 58078be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com pop esi 58088be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com ret 58098be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com } 58108be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com} 58118be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com#endif // HAS_SOBELTOPLANEROW_SSE2 58128be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com 5813610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com#ifdef HAS_SOBELXYROW_SSE2 5814610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com// Mixes Sobel X, Sobel Y and Sobel into ARGB. 5815610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com// A = 255 5816610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com// R = Sobel X 5817610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com// G = Sobel 5818610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com// B = Sobel Y 5819610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com__declspec(naked) __declspec(align(16)) 5820610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.comvoid SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, 5821610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com uint8* dst_argb, int width) { 5822610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com __asm { 5823610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com push esi 5824610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com mov eax, [esp + 4 + 4] // src_sobelx 5825610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com mov esi, [esp + 4 + 8] // src_sobely 5826610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com mov edx, [esp + 4 + 12] // dst_argb 5827610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com mov ecx, [esp + 4 + 16] // width 5828610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com sub esi, eax 5829191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com pcmpeqb xmm5, xmm5 // alpha 255 5830610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com 5831c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 5832610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com convertloop: 5833610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com movdqa xmm0, [eax] // read 16 pixels src_sobelx 5834610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com movdqa xmm1, [eax + esi] // read 16 pixels src_sobely 5835610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com lea eax, [eax + 16] 5836610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com movdqa xmm2, xmm0 5837610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com paddusb xmm2, xmm1 // sobel = sobelx + sobely 5838610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com movdqa xmm3, xmm0 // XA 5839610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com punpcklbw xmm3, xmm5 5840610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com punpckhbw xmm0, xmm5 5841610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com movdqa xmm4, xmm1 // YS 5842610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com punpcklbw xmm4, xmm2 5843610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com punpckhbw xmm1, xmm2 5844610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com movdqa xmm6, xmm4 // YSXA 5845610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com punpcklwd xmm6, xmm3 // First 4 5846610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com punpckhwd xmm4, xmm3 // Next 4 5847610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com movdqa xmm7, xmm1 // YSXA 5848610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com punpcklwd xmm7, xmm0 // Next 4 5849610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com punpckhwd xmm1, xmm0 // Last 4 5850610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com sub ecx, 16 5851610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com movdqa [edx], xmm6 5852610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com movdqa [edx + 16], xmm4 5853610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com movdqa [edx + 32], xmm7 5854610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com movdqa [edx + 48], xmm1 5855610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com lea edx, [edx + 64] 5856610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com jg convertloop 5857610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com 5858610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com pop esi 5859610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com ret 5860610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com } 5861610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com} 5862c93a137671e2e281dfb8d32561fed95caacf608bfbarchard@google.com#endif // HAS_SOBELXYROW_SSE2 5863610e012d56b1cce420369b82335bd178f7e39397fbarchard@google.com 5864f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 5865f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com// Consider float CumulativeSum. 5866f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com// Consider calling CumulativeSum one row at time as needed. 5867f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com// Consider circular CumulativeSum buffer of radius * 2 + 1 height. 5868f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com// Convert cumulative sum for an area to an average for 1 pixel. 5869f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com// topleft is pointer to top left of CumulativeSum buffer for area. 5870f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com// botleft is pointer to bottom left of CumulativeSum buffer. 5871f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com// width is offset from left to right of area in CumulativeSum buffer measured 5872f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com// in number of ints. 5873f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com// area is the number of pixels in the area being averaged. 5874f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com// dst points to pixel to store result to. 5875f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com// count is number of averaged pixels to produce. 5876f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com// Does 4 pixels at a time, requires CumulativeSum pointers to be 16 byte 5877f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com// aligned. 5878f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, 5879f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com int width, int area, uint8* dst, 5880f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com int count) { 5881f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com __asm { 5882f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com mov eax, topleft // eax topleft 5883f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com mov esi, botleft // esi botleft 5884f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com mov edx, width 5885c2a889eb5513143c9207c702429100562b4001f7fbarchard@google.com movd xmm5, area 5886f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com mov edi, dst 5887f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com mov ecx, count 5888c2a889eb5513143c9207c702429100562b4001f7fbarchard@google.com cvtdq2ps xmm5, xmm5 5889c2a889eb5513143c9207c702429100562b4001f7fbarchard@google.com rcpss xmm4, xmm5 // 1.0f / area 5890f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com pshufd xmm4, xmm4, 0 5891f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com sub ecx, 4 5892f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com jl l4b 5893f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 5894191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com cmp area, 128 // 128 pixels will not overflow 15 bits. 5895191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com ja l4 5896191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com 5897c2a889eb5513143c9207c702429100562b4001f7fbarchard@google.com pshufd xmm5, xmm5, 0 // area 5898c2a889eb5513143c9207c702429100562b4001f7fbarchard@google.com pcmpeqb xmm6, xmm6 // constant of 65536.0 - 1 = 65535.0 5899c2a889eb5513143c9207c702429100562b4001f7fbarchard@google.com psrld xmm6, 16 5900c2a889eb5513143c9207c702429100562b4001f7fbarchard@google.com cvtdq2ps xmm6, xmm6 5901c2a889eb5513143c9207c702429100562b4001f7fbarchard@google.com addps xmm5, xmm6 // (65536.0 + area - 1) 5902c2a889eb5513143c9207c702429100562b4001f7fbarchard@google.com mulps xmm5, xmm4 // (65536.0 + area - 1) * 1 / area 5903191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com cvtps2dq xmm5, xmm5 // 0.16 fixed point 5904c2a889eb5513143c9207c702429100562b4001f7fbarchard@google.com packssdw xmm5, xmm5 // 16 bit shorts 5905191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com 5906191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com // 4 pixel loop small blocks. 5907191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com align 4 5908191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com s4: 5909191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com // top left 5910191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com movdqa xmm0, [eax] 5911191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com movdqa xmm1, [eax + 16] 5912191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com movdqa xmm2, [eax + 32] 5913191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com movdqa xmm3, [eax + 48] 5914191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com 5915191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com // - top right 5916191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com psubd xmm0, [eax + edx * 4] 5917191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com psubd xmm1, [eax + edx * 4 + 16] 5918191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com psubd xmm2, [eax + edx * 4 + 32] 5919191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com psubd xmm3, [eax + edx * 4 + 48] 5920191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com lea eax, [eax + 64] 5921191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com 5922191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com // - bottom left 5923191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com psubd xmm0, [esi] 5924191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com psubd xmm1, [esi + 16] 5925191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com psubd xmm2, [esi + 32] 5926191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com psubd xmm3, [esi + 48] 5927191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com 5928191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com // + bottom right 5929191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com paddd xmm0, [esi + edx * 4] 5930191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com paddd xmm1, [esi + edx * 4 + 16] 5931191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com paddd xmm2, [esi + edx * 4 + 32] 5932191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com paddd xmm3, [esi + edx * 4 + 48] 5933191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com lea esi, [esi + 64] 5934191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com 5935191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com packssdw xmm0, xmm1 // pack 4 pixels into 2 registers 5936191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com packssdw xmm2, xmm3 5937191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com 5938191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com pmulhuw xmm0, xmm5 5939191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com pmulhuw xmm2, xmm5 5940191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com 5941191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com packuswb xmm0, xmm2 5942191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com movdqu [edi], xmm0 5943191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com lea edi, [edi + 16] 5944191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com sub ecx, 4 5945191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com jge s4 5946191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com 5947191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com jmp l4b 5948191ab180736f78cd25989ec08007874946c85c77fbarchard@google.com 5949f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com // 4 pixel loop 5950f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com align 4 5951f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com l4: 5952f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com // top left 5953f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com movdqa xmm0, [eax] 5954f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com movdqa xmm1, [eax + 16] 5955f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com movdqa xmm2, [eax + 32] 5956f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com movdqa xmm3, [eax + 48] 5957f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 5958f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com // - top right 5959f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com psubd xmm0, [eax + edx * 4] 5960f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com psubd xmm1, [eax + edx * 4 + 16] 5961f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com psubd xmm2, [eax + edx * 4 + 32] 5962f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com psubd xmm3, [eax + edx * 4 + 48] 5963f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com lea eax, [eax + 64] 5964f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 5965f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com // - bottom left 5966f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com psubd xmm0, [esi] 5967f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com psubd xmm1, [esi + 16] 5968f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com psubd xmm2, [esi + 32] 5969f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com psubd xmm3, [esi + 48] 5970f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 5971f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com // + bottom right 5972f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com paddd xmm0, [esi + edx * 4] 5973f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com paddd xmm1, [esi + edx * 4 + 16] 5974f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com paddd xmm2, [esi + edx * 4 + 32] 5975f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com paddd xmm3, [esi + edx * 4 + 48] 5976f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com lea esi, [esi + 64] 5977f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 5978f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com cvtdq2ps xmm0, xmm0 // Average = Sum * 1 / Area 5979f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com cvtdq2ps xmm1, xmm1 5980f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com mulps xmm0, xmm4 5981f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com mulps xmm1, xmm4 5982f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com cvtdq2ps xmm2, xmm2 5983f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com cvtdq2ps xmm3, xmm3 5984f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com mulps xmm2, xmm4 5985f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com mulps xmm3, xmm4 5986f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com cvtps2dq xmm0, xmm0 5987f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com cvtps2dq xmm1, xmm1 5988f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com cvtps2dq xmm2, xmm2 5989f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com cvtps2dq xmm3, xmm3 5990f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com packssdw xmm0, xmm1 5991f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com packssdw xmm2, xmm3 5992f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com packuswb xmm0, xmm2 5993f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com movdqu [edi], xmm0 5994f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com lea edi, [edi + 16] 5995f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com sub ecx, 4 5996f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com jge l4 5997f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 5998f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com l4b: 5999f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com add ecx, 4 - 1 6000f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com jl l1b 6001f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 6002f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com // 1 pixel loop 6003f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com align 4 6004f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com l1: 6005f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com movdqa xmm0, [eax] 6006f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com psubd xmm0, [eax + edx * 4] 6007f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com lea eax, [eax + 16] 6008f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com psubd xmm0, [esi] 6009f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com paddd xmm0, [esi + edx * 4] 6010f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com lea esi, [esi + 16] 6011f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com cvtdq2ps xmm0, xmm0 6012f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com mulps xmm0, xmm4 6013f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com cvtps2dq xmm0, xmm0 6014f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com packssdw xmm0, xmm0 6015f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com packuswb xmm0, xmm0 6016f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com movd dword ptr [edi], xmm0 6017f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com lea edi, [edi + 4] 6018f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com sub ecx, 1 6019f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com jge l1 6020f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com l1b: 6021f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com } 6022f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com} 6023f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 6024f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 6025f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2 6026f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com// Creates a table of cumulative sums where each value is a sum of all values 6027f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com// above and to the left of the value. 6028f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.comvoid ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, 6029133adc46470722b24fdac30d7537d5009e61ef0cfbarchard@google.com const int32* previous_cumsum, int width) { 6030f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com __asm { 6031f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com mov eax, row 6032f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com mov edx, cumsum 6033f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com mov esi, previous_cumsum 6034f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com mov ecx, width 6035f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com pxor xmm0, xmm0 6036f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com pxor xmm1, xmm1 6037f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 6038f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com sub ecx, 4 6039f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com jl l4b 6040f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com test edx, 15 6041f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com jne l4b 6042f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 6043f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com // 4 pixel loop 6044f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com align 4 6045f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com l4: 6046f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com movdqu xmm2, [eax] // 4 argb pixels 16 bytes. 6047f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com lea eax, [eax + 16] 6048f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com movdqa xmm4, xmm2 6049f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 6050f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com punpcklbw xmm2, xmm1 6051f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com movdqa xmm3, xmm2 6052f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com punpcklwd xmm2, xmm1 6053f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com punpckhwd xmm3, xmm1 6054f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 6055f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com punpckhbw xmm4, xmm1 6056f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com movdqa xmm5, xmm4 6057f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com punpcklwd xmm4, xmm1 6058f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com punpckhwd xmm5, xmm1 6059f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 6060f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com paddd xmm0, xmm2 60619335518f4127167ee54b0872ab715c674be06005fbarchard@google.com movdqa xmm2, [esi] // previous row above. 6062f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com paddd xmm2, xmm0 6063f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 6064f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com paddd xmm0, xmm3 60659335518f4127167ee54b0872ab715c674be06005fbarchard@google.com movdqa xmm3, [esi + 16] 6066f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com paddd xmm3, xmm0 6067f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 6068f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com paddd xmm0, xmm4 60699335518f4127167ee54b0872ab715c674be06005fbarchard@google.com movdqa xmm4, [esi + 32] 6070f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com paddd xmm4, xmm0 6071f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 6072f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com paddd xmm0, xmm5 60739335518f4127167ee54b0872ab715c674be06005fbarchard@google.com movdqa xmm5, [esi + 48] 60749335518f4127167ee54b0872ab715c674be06005fbarchard@google.com lea esi, [esi + 64] 6075f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com paddd xmm5, xmm0 6076f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 6077f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com movdqa [edx], xmm2 6078f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com movdqa [edx + 16], xmm3 6079f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com movdqa [edx + 32], xmm4 6080f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com movdqa [edx + 48], xmm5 6081f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 6082f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com lea edx, [edx + 64] 6083f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com sub ecx, 4 6084f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com jge l4 6085f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 6086f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com l4b: 6087f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com add ecx, 4 - 1 6088f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com jl l1b 6089f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 6090f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com // 1 pixel loop 6091f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com align 4 6092f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com l1: 6093f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com movd xmm2, dword ptr [eax] // 1 argb pixel 4 bytes. 6094f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com lea eax, [eax + 4] 6095f38aefef4b66dc8ebe77ff37234be332731d47f6fbarchard@google.com punpcklbw xmm2, xmm1 6096f38aefef4b66dc8ebe77ff37234be332731d47f6fbarchard@google.com punpcklwd xmm2, xmm1 6097f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com paddd xmm0, xmm2 60989335518f4127167ee54b0872ab715c674be06005fbarchard@google.com movdqu xmm2, [esi] 60999335518f4127167ee54b0872ab715c674be06005fbarchard@google.com lea esi, [esi + 16] 6100f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com paddd xmm2, xmm0 6101f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com movdqu [edx], xmm2 6102f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com lea edx, [edx + 16] 6103f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com sub ecx, 1 6104f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com jge l1 6105f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 6106f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com l1b: 6107f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com } 6108f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com} 6109f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2 6110f51e87912eebc959ac6b9d1ab44978e0e056ca74fbarchard@google.com 6111864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com#ifdef HAS_ARGBAFFINEROW_SSE2 6112864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com// Copy ARGB pixels from source image with slope to a row of destination. 6113864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com__declspec(naked) __declspec(align(16)) 6114fc7314e86bc7a1a88b38b815e881183521801ea9fbarchard@google.comLIBYUV_API 6115864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.comvoid ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, 6116864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com uint8* dst_argb, const float* uv_dudv, int width) { 6117864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com __asm { 6118864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com push esi 6119e3cc76943e5d1963443ed323c0abd35bafeba945fbarchard@google.com push edi 6120a0630d77f0433f77eba221854b017d6c8bc5229afbarchard@google.com mov eax, [esp + 12] // src_argb 6121e3cc76943e5d1963443ed323c0abd35bafeba945fbarchard@google.com mov esi, [esp + 16] // stride 6122e3cc76943e5d1963443ed323c0abd35bafeba945fbarchard@google.com mov edx, [esp + 20] // dst_argb 6123e3cc76943e5d1963443ed323c0abd35bafeba945fbarchard@google.com mov ecx, [esp + 24] // pointer to uv_dudv 6124864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com movq xmm2, qword ptr [ecx] // uv 6125845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com movq xmm7, qword ptr [ecx + 8] // dudv 6126e3cc76943e5d1963443ed323c0abd35bafeba945fbarchard@google.com mov ecx, [esp + 28] // width 6127864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com shl esi, 16 // 4, stride 6128864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com add esi, 4 6129845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com movd xmm5, esi 6130845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com sub ecx, 4 6131845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com jl l4b 6132864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com 6133845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com // setup for 4 pixel loop 6134845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com pshufd xmm7, xmm7, 0x44 // dup dudv 6135845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com pshufd xmm5, xmm5, 0 // dup 4, stride 6136864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com movdqa xmm0, xmm2 // x0, y0, x1, y1 6137845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com addps xmm0, xmm7 6138864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com movlhps xmm2, xmm0 6139845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com movdqa xmm4, xmm7 6140845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com addps xmm4, xmm4 // dudv *= 2 6141845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com movdqa xmm3, xmm2 // x2, y2, x3, y3 6142845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com addps xmm3, xmm4 6143845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com addps xmm4, xmm4 // dudv *= 4 61442d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com 6145845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com // 4 pixel loop 6146864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com align 4 6147845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com l4: 6148845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com cvttps2dq xmm0, xmm2 // x, y float to int first 2 6149845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com cvttps2dq xmm1, xmm3 // x, y float to int next 2 6150845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com packssdw xmm0, xmm1 // x, y as 8 shorts 6151845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com pmaddwd xmm0, xmm5 // offsets = x * 4 + y * stride. 6152845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com movd esi, xmm0 6153845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com pshufd xmm0, xmm0, 0x39 // shift right 6154e3cc76943e5d1963443ed323c0abd35bafeba945fbarchard@google.com movd edi, xmm0 6155845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com pshufd xmm0, xmm0, 0x39 // shift right 6156e3cc76943e5d1963443ed323c0abd35bafeba945fbarchard@google.com movd xmm1, [eax + esi] // read pixel 0 6157e3cc76943e5d1963443ed323c0abd35bafeba945fbarchard@google.com movd xmm6, [eax + edi] // read pixel 1 6158845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com punpckldq xmm1, xmm6 // combine pixel 0 and 1 6159e3cc76943e5d1963443ed323c0abd35bafeba945fbarchard@google.com addps xmm2, xmm4 // x, y += dx, dy first 2 6160e3cc76943e5d1963443ed323c0abd35bafeba945fbarchard@google.com movq qword ptr [edx], xmm1 6161845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com movd esi, xmm0 6162845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com pshufd xmm0, xmm0, 0x39 // shift right 6163e3cc76943e5d1963443ed323c0abd35bafeba945fbarchard@google.com movd edi, xmm0 6164845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com movd xmm6, [eax + esi] // read pixel 2 6165e3cc76943e5d1963443ed323c0abd35bafeba945fbarchard@google.com movd xmm0, [eax + edi] // read pixel 3 6166845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com punpckldq xmm6, xmm0 // combine pixel 2 and 3 6167e3cc76943e5d1963443ed323c0abd35bafeba945fbarchard@google.com addps xmm3, xmm4 // x, y += dx, dy next 2 6168845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com sub ecx, 4 6169e3cc76943e5d1963443ed323c0abd35bafeba945fbarchard@google.com movq qword ptr 8[edx], xmm6 6170845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com lea edx, [edx + 16] 6171845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com jge l4 6172864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com 6173845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com l4b: 6174845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com add ecx, 4 - 1 6175864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com jl l1b 6176864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com 6177864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com // 1 pixel loop 6178864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com align 4 6179864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com l1: 6180845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com cvttps2dq xmm0, xmm2 // x, y float to int 6181845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com packssdw xmm0, xmm0 // x, y as shorts 6182845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com pmaddwd xmm0, xmm5 // offset = x * 4 + y * stride 6183845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com addps xmm2, xmm7 // x, y += dx, dy 6184845e94d1a74dc4b773159bf91d3c5da23b781476fbarchard@google.com movd esi, xmm0 6185864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com movd xmm0, [eax + esi] // copy a pixel 6186864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com sub ecx, 1 6187864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com movd [edx], xmm0 6188864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com lea edx, [edx + 4] 6189864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com jge l1 6190864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com l1b: 6191e3cc76943e5d1963443ed323c0abd35bafeba945fbarchard@google.com pop edi 6192864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com pop esi 6193864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com ret 6194864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com } 6195864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com} 6196864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com#endif // HAS_ARGBAFFINEROW_SSE2 6197864f828a0167bde25b0d24d6b865aa514919fcc9fbarchard@google.com 61982154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com#ifdef HAS_INTERPOLATEROW_AVX2 61992154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com// Bilinear filter 16x2 -> 16x1 62002154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com__declspec(naked) __declspec(align(16)) 62012154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.comvoid InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, 62022154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com ptrdiff_t src_stride, int dst_width, 62032154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com int source_y_fraction) { 62042154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com __asm { 62052154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com push esi 62062154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com push edi 62072154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com mov edi, [esp + 8 + 4] // dst_ptr 62082154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com mov esi, [esp + 8 + 8] // src_ptr 62092154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com mov edx, [esp + 8 + 12] // src_stride 62102154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com mov ecx, [esp + 8 + 16] // dst_width 62112154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com mov eax, [esp + 8 + 20] // source_y_fraction (0..255) 62122154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com shr eax, 1 62132154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com // Dispatch to specialized filters if applicable. 62142154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com cmp eax, 0 62152154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com je xloop100 // 0 / 128. Blend 100 / 0. 62162154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com sub edi, esi 62172154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com cmp eax, 32 62182154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com je xloop75 // 32 / 128 is 0.25. Blend 75 / 25. 62192154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com cmp eax, 64 62202154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com je xloop50 // 64 / 128 is 0.50. Blend 50 / 50. 62212154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com cmp eax, 96 62222154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com je xloop25 // 96 / 128 is 0.75. Blend 25 / 75. 62232154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com 62242154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vmovd xmm0, eax // high fraction 0..127 62252154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com neg eax 62262154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com add eax, 128 62272154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vmovd xmm5, eax // low fraction 128..1 62282154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vpunpcklbw xmm5, xmm5, xmm0 62292154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vpunpcklwd xmm5, xmm5, xmm5 62302154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vpxor ymm0, ymm0, ymm0 62312154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vpermd ymm5, ymm0, ymm5 62322154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com 6233c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 62342154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com xloop: 62352154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vmovdqu ymm0, [esi] 62362154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vmovdqu ymm2, [esi + edx] 62372154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vpunpckhbw ymm1, ymm0, ymm2 // mutates 62382154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vpunpcklbw ymm0, ymm0, ymm2 // mutates 62392154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vpmaddubsw ymm0, ymm0, ymm5 62402154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vpmaddubsw ymm1, ymm1, ymm5 62412154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vpsrlw ymm0, ymm0, 7 62422154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vpsrlw ymm1, ymm1, 7 62432154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vpackuswb ymm0, ymm0, ymm1 // unmutates 62442154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com sub ecx, 32 62452154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vmovdqu [esi + edi], ymm0 62462154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com lea esi, [esi + 32] 62472154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com jg xloop 62482154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com jmp xloop99 62492154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com 62502154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com // Blend 25 / 75. 6251c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 62522154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com xloop25: 62532154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vmovdqu ymm0, [esi] 62542154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vpavgb ymm0, ymm0, [esi + edx] 62552154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vpavgb ymm0, ymm0, [esi + edx] 62562154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com sub ecx, 32 62572154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vmovdqu [esi + edi], ymm0 62582154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com lea esi, [esi + 32] 62592154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com jg xloop25 62602154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com jmp xloop99 62612154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com 62622154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com // Blend 50 / 50. 6263c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 62642154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com xloop50: 62652154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vmovdqu ymm0, [esi] 62662154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vpavgb ymm0, ymm0, [esi + edx] 62672154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com sub ecx, 32 62682154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vmovdqu [esi + edi], ymm0 62692154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com lea esi, [esi + 32] 62702154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com jg xloop50 62712154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com jmp xloop99 62722154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com 62732154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com // Blend 75 / 25. 6274c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 62752154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com xloop75: 62762154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vmovdqu ymm0, [esi + edx] 62772154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vpavgb ymm0, ymm0, [esi] 62782154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vpavgb ymm0, ymm0, [esi] 62792154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com sub ecx, 32 62802154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vmovdqu [esi + edi], ymm0 62812154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com lea esi, [esi + 32] 62822154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com jg xloop75 62832154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com jmp xloop99 62842154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com 62852154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com // Blend 100 / 0 - Copy row unchanged. 6286c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 62872154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com xloop100: 62882154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com rep movsb 62892154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com 62902154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com xloop99: 62912154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com pop edi 62922154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com pop esi 62932154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com vzeroupper 62942154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com ret 62952154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com } 62962154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com} 62972154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com#endif // HAS_INTERPOLATEROW_AVX2 62982154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com 62992154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com#ifdef HAS_INTERPOLATEROW_SSSE3 6300b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com// Bilinear filter 16x2 -> 16x1 63019bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com__declspec(naked) __declspec(align(16)) 6302b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.comvoid InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, 6303b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com ptrdiff_t src_stride, int dst_width, 6304b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com int source_y_fraction) { 63059bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com __asm { 63069bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com push esi 63079bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com push edi 6308b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com mov edi, [esp + 8 + 4] // dst_ptr 6309b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com mov esi, [esp + 8 + 8] // src_ptr 63109bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com mov edx, [esp + 8 + 12] // src_stride 63119bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com mov ecx, [esp + 8 + 16] // dst_width 63129bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com mov eax, [esp + 8 + 20] // source_y_fraction (0..255) 63139bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com sub edi, esi 63149bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com shr eax, 1 63158c4e5e284c320cd9b9bf137ba1bd10a88b398b48fbarchard@google.com // Dispatch to specialized filters if applicable. 63168c4e5e284c320cd9b9bf137ba1bd10a88b398b48fbarchard@google.com cmp eax, 0 63178c4e5e284c320cd9b9bf137ba1bd10a88b398b48fbarchard@google.com je xloop100 // 0 / 128. Blend 100 / 0. 6318b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com cmp eax, 32 63198c4e5e284c320cd9b9bf137ba1bd10a88b398b48fbarchard@google.com je xloop75 // 32 / 128 is 0.25. Blend 75 / 25. 63209bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com cmp eax, 64 63218c4e5e284c320cd9b9bf137ba1bd10a88b398b48fbarchard@google.com je xloop50 // 64 / 128 is 0.50. Blend 50 / 50. 6322b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com cmp eax, 96 63238c4e5e284c320cd9b9bf137ba1bd10a88b398b48fbarchard@google.com je xloop25 // 96 / 128 is 0.75. Blend 25 / 75. 6324b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com 63259bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com movd xmm0, eax // high fraction 0..127 63269bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com neg eax 63279bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com add eax, 128 63289bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com movd xmm5, eax // low fraction 128..1 63299bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com punpcklbw xmm5, xmm0 63309bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com punpcklwd xmm5, xmm5 63319bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com pshufd xmm5, xmm5, 0 63329bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com 6333c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 63349bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com xloop: 63359bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com movdqa xmm0, [esi] 63369bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com movdqa xmm2, [esi + edx] 63379bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com movdqa xmm1, xmm0 63389bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com punpcklbw xmm0, xmm2 63399bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com punpckhbw xmm1, xmm2 63409bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com pmaddubsw xmm0, xmm5 63419bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com pmaddubsw xmm1, xmm5 63429bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com psrlw xmm0, 7 63439bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com psrlw xmm1, 7 63449bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com packuswb xmm0, xmm1 6345b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 63469bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com movdqa [esi + edi], xmm0 63479bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com lea esi, [esi + 16] 63489bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com jg xloop 6349b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com jmp xloop99 63509bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com 6351b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com // Blend 25 / 75. 6352c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6353b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com xloop25: 6354b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com movdqa xmm0, [esi] 6355b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com movdqa xmm1, [esi + edx] 6356b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com pavgb xmm0, xmm1 6357b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com pavgb xmm0, xmm1 6358b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 6359b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com movdqa [esi + edi], xmm0 6360b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com lea esi, [esi + 16] 6361b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com jg xloop25 6362b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com jmp xloop99 63639bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com 6364b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com // Blend 50 / 50. 6365c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6366b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com xloop50: 63679bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com movdqa xmm0, [esi] 6368b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com movdqa xmm1, [esi + edx] 6369b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com pavgb xmm0, xmm1 6370b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 63719bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com movdqa [esi + edi], xmm0 63729bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com lea esi, [esi + 16] 6373b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com jg xloop50 6374b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com jmp xloop99 63759bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com 6376b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com // Blend 75 / 25. 6377c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6378b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com xloop75: 6379b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com movdqa xmm1, [esi] 6380b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com movdqa xmm0, [esi + edx] 6381b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com pavgb xmm0, xmm1 6382b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com pavgb xmm0, xmm1 6383b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 6384b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com movdqa [esi + edi], xmm0 6385b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com lea esi, [esi + 16] 6386b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com jg xloop75 6387b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com jmp xloop99 63889bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com 6389b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com // Blend 100 / 0 - Copy row unchanged. 6390c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6391b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com xloop100: 63929bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com movdqa xmm0, [esi] 6393b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 63949bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com movdqa [esi + edi], xmm0 63959bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com lea esi, [esi + 16] 6396b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com jg xloop100 63979bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com 63988811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com xloop99: 63998811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com pop edi 64008811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com pop esi 64018811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com ret 64028811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com } 64038811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com} 64042154de414c1ac41fb434348964c728ccc077ffd3fbarchard@google.com#endif // HAS_INTERPOLATEROW_SSSE3 64058811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com 640697c96261076adb3294105db38b461bcfae9597d3fbarchard@google.com#ifdef HAS_INTERPOLATEROW_SSE2 6407b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com// Bilinear filter 16x2 -> 16x1 64088811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com__declspec(naked) __declspec(align(16)) 6409b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.comvoid InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, 6410b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com ptrdiff_t src_stride, int dst_width, 6411b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com int source_y_fraction) { 64128811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com __asm { 64138811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com push esi 64148811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com push edi 6415b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com mov edi, [esp + 8 + 4] // dst_ptr 6416b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com mov esi, [esp + 8 + 8] // src_ptr 64178811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com mov edx, [esp + 8 + 12] // src_stride 64188811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com mov ecx, [esp + 8 + 16] // dst_width 64198811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com mov eax, [esp + 8 + 20] // source_y_fraction (0..255) 64208811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com sub edi, esi 64218c4e5e284c320cd9b9bf137ba1bd10a88b398b48fbarchard@google.com // Dispatch to specialized filters if applicable. 64228c4e5e284c320cd9b9bf137ba1bd10a88b398b48fbarchard@google.com cmp eax, 0 64238c4e5e284c320cd9b9bf137ba1bd10a88b398b48fbarchard@google.com je xloop100 // 0 / 256. Blend 100 / 0. 64248811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com cmp eax, 64 64258c4e5e284c320cd9b9bf137ba1bd10a88b398b48fbarchard@google.com je xloop75 // 64 / 256 is 0.25. Blend 75 / 25. 64268811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com cmp eax, 128 64278c4e5e284c320cd9b9bf137ba1bd10a88b398b48fbarchard@google.com je xloop50 // 128 / 256 is 0.50. Blend 50 / 50. 64288811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com cmp eax, 192 64298c4e5e284c320cd9b9bf137ba1bd10a88b398b48fbarchard@google.com je xloop25 // 192 / 256 is 0.75. Blend 25 / 75. 64308811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com 64318811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com movd xmm5, eax // xmm5 = y fraction 64328811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com punpcklbw xmm5, xmm5 64338811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com psrlw xmm5, 1 64348811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com punpcklwd xmm5, xmm5 64358811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com punpckldq xmm5, xmm5 64368811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com punpcklqdq xmm5, xmm5 64378811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com pxor xmm4, xmm4 64388811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com 6439c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 64408811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com xloop: 64418811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com movdqa xmm0, [esi] // row0 64428811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com movdqa xmm2, [esi + edx] // row1 64438811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com movdqa xmm1, xmm0 64448811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com movdqa xmm3, xmm2 64458811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com punpcklbw xmm2, xmm4 64468811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com punpckhbw xmm3, xmm4 64478811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com punpcklbw xmm0, xmm4 64488811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com punpckhbw xmm1, xmm4 64498811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com psubw xmm2, xmm0 // row1 - row0 64508811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com psubw xmm3, xmm1 64518811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com paddw xmm2, xmm2 // 9 bits * 15 bits = 8.16 64528811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com paddw xmm3, xmm3 64538811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com pmulhw xmm2, xmm5 // scale diff 64548811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com pmulhw xmm3, xmm5 64558811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com paddw xmm0, xmm2 // sum rows 64568811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com paddw xmm1, xmm3 64578811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com packuswb xmm0, xmm1 6458b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 64598811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com movdqa [esi + edi], xmm0 64608811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com lea esi, [esi + 16] 64618811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com jg xloop 64628811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com jmp xloop99 64638811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com 64648811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com // Blend 25 / 75. 6465c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 64668811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com xloop25: 64678811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com movdqa xmm0, [esi] 64688811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com movdqa xmm1, [esi + edx] 64698811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com pavgb xmm0, xmm1 64708811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com pavgb xmm0, xmm1 6471b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 64728811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com movdqa [esi + edi], xmm0 64738811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com lea esi, [esi + 16] 64748811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com jg xloop25 64758811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com jmp xloop99 64768811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com 64778811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com // Blend 50 / 50. 6478c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 64798811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com xloop50: 64808811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com movdqa xmm0, [esi] 64818811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com movdqa xmm1, [esi + edx] 64828811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com pavgb xmm0, xmm1 6483b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 64848811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com movdqa [esi + edi], xmm0 64858811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com lea esi, [esi + 16] 64868811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com jg xloop50 64878811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com jmp xloop99 64888811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com 64898811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com // Blend 75 / 25. 6490c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 64918811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com xloop75: 64928811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com movdqa xmm1, [esi] 64938811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com movdqa xmm0, [esi + edx] 64948811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com pavgb xmm0, xmm1 64958811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com pavgb xmm0, xmm1 6496b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 64978811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com movdqa [esi + edi], xmm0 64988811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com lea esi, [esi + 16] 64998811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com jg xloop75 65008811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com jmp xloop99 65018811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com 65028811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com // Blend 100 / 0 - Copy row unchanged. 6503c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 65048811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com xloop100: 65058811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com movdqa xmm0, [esi] 6506b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 65078811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com movdqa [esi + edi], xmm0 65088811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com lea esi, [esi + 16] 65098811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com jg xloop100 65108811289be7ad257f16a6532aeec2cb5db331fa54fbarchard@google.com 6511b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com xloop99: 65129bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com pop edi 65139bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com pop esi 65149bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com ret 65159bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com } 65169bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com} 651797c96261076adb3294105db38b461bcfae9597d3fbarchard@google.com#endif // HAS_INTERPOLATEROW_SSE2 65189bcc9a25355841f844e9fae3ba40522447312a66fbarchard@google.com 6519b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com// Bilinear filter 16x2 -> 16x1 6520cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com__declspec(naked) __declspec(align(16)) 6521b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.comvoid InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr, 6522b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com ptrdiff_t src_stride, int dst_width, 6523b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com int source_y_fraction) { 6524cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com __asm { 6525cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com push esi 6526cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com push edi 6527b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com mov edi, [esp + 8 + 4] // dst_ptr 6528b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com mov esi, [esp + 8 + 8] // src_ptr 6529cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com mov edx, [esp + 8 + 12] // src_stride 6530cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com mov ecx, [esp + 8 + 16] // dst_width 6531cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com mov eax, [esp + 8 + 20] // source_y_fraction (0..255) 6532cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com sub edi, esi 6533cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com shr eax, 1 6534cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com // Dispatch to specialized filters if applicable. 6535cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com cmp eax, 0 6536cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com je xloop100 // 0 / 128. Blend 100 / 0. 6537cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com cmp eax, 32 6538cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com je xloop75 // 32 / 128 is 0.25. Blend 75 / 25. 6539cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com cmp eax, 64 6540cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com je xloop50 // 64 / 128 is 0.50. Blend 50 / 50. 6541cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com cmp eax, 96 6542cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com je xloop25 // 96 / 128 is 0.75. Blend 25 / 75. 6543cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com 6544cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movd xmm0, eax // high fraction 0..127 6545cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com neg eax 6546cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com add eax, 128 6547cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movd xmm5, eax // low fraction 128..1 6548cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com punpcklbw xmm5, xmm0 6549cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com punpcklwd xmm5, xmm5 6550cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pshufd xmm5, xmm5, 0 6551cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com 6552c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6553cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com xloop: 6554cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm0, [esi] 6555cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm2, [esi + edx] 6556cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm1, xmm0 6557cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com punpcklbw xmm0, xmm2 6558cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com punpckhbw xmm1, xmm2 6559cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pmaddubsw xmm0, xmm5 6560cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pmaddubsw xmm1, xmm5 6561cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com psrlw xmm0, 7 6562cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com psrlw xmm1, 7 6563cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com packuswb xmm0, xmm1 6564b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 6565cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu [esi + edi], xmm0 6566cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com lea esi, [esi + 16] 6567cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jg xloop 6568cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jmp xloop99 6569cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com 6570cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com // Blend 25 / 75. 6571c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6572cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com xloop25: 6573cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm0, [esi] 6574cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm1, [esi + edx] 6575cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pavgb xmm0, xmm1 6576cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pavgb xmm0, xmm1 6577b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 6578cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu [esi + edi], xmm0 6579cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com lea esi, [esi + 16] 6580cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jg xloop25 6581cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jmp xloop99 6582cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com 6583cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com // Blend 50 / 50. 6584c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6585cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com xloop50: 6586cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm0, [esi] 6587cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm1, [esi + edx] 6588cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pavgb xmm0, xmm1 6589b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 6590cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu [esi + edi], xmm0 6591cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com lea esi, [esi + 16] 6592cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jg xloop50 6593cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jmp xloop99 6594cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com 6595cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com // Blend 75 / 25. 6596c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6597cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com xloop75: 6598cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm1, [esi] 6599cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm0, [esi + edx] 6600cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pavgb xmm0, xmm1 6601cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pavgb xmm0, xmm1 6602b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 6603cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu [esi + edi], xmm0 6604cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com lea esi, [esi + 16] 6605cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jg xloop75 6606cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jmp xloop99 6607cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com 6608cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com // Blend 100 / 0 - Copy row unchanged. 6609c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6610cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com xloop100: 6611cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm0, [esi] 6612b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 6613cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu [esi + edi], xmm0 6614cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com lea esi, [esi + 16] 6615cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jg xloop100 6616cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com 6617cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com xloop99: 6618cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pop edi 6619cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pop esi 6620cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com ret 6621cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com } 6622cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com} 6623cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com 662497c96261076adb3294105db38b461bcfae9597d3fbarchard@google.com#ifdef HAS_INTERPOLATEROW_SSE2 6625b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com// Bilinear filter 16x2 -> 16x1 6626cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com__declspec(naked) __declspec(align(16)) 6627b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.comvoid InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr, 6628b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com ptrdiff_t src_stride, int dst_width, 6629b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com int source_y_fraction) { 6630cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com __asm { 6631cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com push esi 6632cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com push edi 6633b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com mov edi, [esp + 8 + 4] // dst_ptr 6634b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com mov esi, [esp + 8 + 8] // src_ptr 6635cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com mov edx, [esp + 8 + 12] // src_stride 6636cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com mov ecx, [esp + 8 + 16] // dst_width 6637cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com mov eax, [esp + 8 + 20] // source_y_fraction (0..255) 6638cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com sub edi, esi 6639cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com // Dispatch to specialized filters if applicable. 6640cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com cmp eax, 0 6641cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com je xloop100 // 0 / 256. Blend 100 / 0. 6642cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com cmp eax, 64 6643cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com je xloop75 // 64 / 256 is 0.25. Blend 75 / 25. 6644cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com cmp eax, 128 6645cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com je xloop50 // 128 / 256 is 0.50. Blend 50 / 50. 6646cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com cmp eax, 192 6647cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com je xloop25 // 192 / 256 is 0.75. Blend 25 / 75. 6648cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com 6649cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movd xmm5, eax // xmm5 = y fraction 6650cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com punpcklbw xmm5, xmm5 6651cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com psrlw xmm5, 1 6652cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com punpcklwd xmm5, xmm5 6653cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com punpckldq xmm5, xmm5 6654cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com punpcklqdq xmm5, xmm5 6655cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pxor xmm4, xmm4 6656cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com 6657c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6658cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com xloop: 6659cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm0, [esi] // row0 6660cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm2, [esi + edx] // row1 6661cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm1, xmm0 6662cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm3, xmm2 6663cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com punpcklbw xmm2, xmm4 6664cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com punpckhbw xmm3, xmm4 6665cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com punpcklbw xmm0, xmm4 6666cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com punpckhbw xmm1, xmm4 6667cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com psubw xmm2, xmm0 // row1 - row0 6668cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com psubw xmm3, xmm1 6669cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com paddw xmm2, xmm2 // 9 bits * 15 bits = 8.16 6670cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com paddw xmm3, xmm3 6671cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pmulhw xmm2, xmm5 // scale diff 6672cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pmulhw xmm3, xmm5 6673cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com paddw xmm0, xmm2 // sum rows 6674cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com paddw xmm1, xmm3 6675cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com packuswb xmm0, xmm1 6676b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 6677cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu [esi + edi], xmm0 6678cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com lea esi, [esi + 16] 6679cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jg xloop 6680cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jmp xloop99 6681cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com 6682cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com // Blend 25 / 75. 6683c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6684cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com xloop25: 6685cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm0, [esi] 6686cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm1, [esi + edx] 6687cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pavgb xmm0, xmm1 6688cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pavgb xmm0, xmm1 6689b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 6690cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu [esi + edi], xmm0 6691cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com lea esi, [esi + 16] 6692cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jg xloop25 6693cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jmp xloop99 6694cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com 6695cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com // Blend 50 / 50. 6696c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6697cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com xloop50: 6698cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm0, [esi] 6699cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm1, [esi + edx] 6700cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pavgb xmm0, xmm1 6701b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 6702cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu [esi + edi], xmm0 6703cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com lea esi, [esi + 16] 6704cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jg xloop50 6705cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jmp xloop99 6706cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com 6707cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com // Blend 75 / 25. 6708c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6709cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com xloop75: 6710cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm1, [esi] 6711cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm0, [esi + edx] 6712cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pavgb xmm0, xmm1 6713cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pavgb xmm0, xmm1 6714b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 6715cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu [esi + edi], xmm0 6716cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com lea esi, [esi + 16] 6717cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jg xloop75 6718cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jmp xloop99 6719cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com 6720cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com // Blend 100 / 0 - Copy row unchanged. 6721c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6722cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com xloop100: 6723cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu xmm0, [esi] 6724b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com sub ecx, 16 6725cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com movdqu [esi + edi], xmm0 6726cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com lea esi, [esi + 16] 6727cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com jg xloop100 6728cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com 6729cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com xloop99: 6730cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pop edi 6731cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com pop esi 6732cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com ret 6733cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com } 6734cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com} 673597c96261076adb3294105db38b461bcfae9597d3fbarchard@google.com#endif // HAS_INTERPOLATEROW_SSE2 6736cd6056c01cec8de0431390933537c9b8458bd472fbarchard@google.com 6737e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com__declspec(naked) __declspec(align(16)) 6738e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.comvoid HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, 6739e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com uint8* dst_uv, int pix) { 6740e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com __asm { 6741e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com push edi 6742e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com mov eax, [esp + 4 + 4] // src_uv 6743e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com mov edx, [esp + 4 + 8] // src_uv_stride 6744e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com mov edi, [esp + 4 + 12] // dst_v 6745e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com mov ecx, [esp + 4 + 16] // pix 6746e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com sub edi, eax 6747e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com 6748c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6749e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com convertloop: 6750e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com movdqa xmm0, [eax] 6751e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com pavgb xmm0, [eax + edx] 6752e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com sub ecx, 16 6753e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com movdqa [eax + edi], xmm0 6754e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com lea eax, [eax + 16] 6755e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com jg convertloop 6756e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com pop edi 6757e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com ret 6758e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com } 6759e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com} 6760e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com 6761e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com#ifdef HAS_HALFROW_AVX2 6762e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com__declspec(naked) __declspec(align(16)) 6763e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.comvoid HalfRow_AVX2(const uint8* src_uv, int src_uv_stride, 6764e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com uint8* dst_uv, int pix) { 6765e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com __asm { 6766e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com push edi 6767e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com mov eax, [esp + 4 + 4] // src_uv 6768e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com mov edx, [esp + 4 + 8] // src_uv_stride 6769e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com mov edi, [esp + 4 + 12] // dst_v 6770e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com mov ecx, [esp + 4 + 16] // pix 6771e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com sub edi, eax 6772e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com 6773c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6774e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com convertloop: 6775e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com vmovdqu ymm0, [eax] 6776e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com vpavgb ymm0, ymm0, [eax + edx] 6777e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com sub ecx, 32 6778e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com vmovdqu [eax + edi], ymm0 6779e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com lea eax, [eax + 32] 6780e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com jg convertloop 67819b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com 6782e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com pop edi 67839b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 6784e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com ret 6785e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com } 6786e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com} 6787e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com#endif // HAS_HALFROW_AVX2 6788e1bb5d94302dfa31c305bd8d0e3083a70cac5d77fbarchard@google.com 67898d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com__declspec(naked) __declspec(align(16)) 67901096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.comvoid ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, 67911096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com uint32 selector, int pix) { 67928d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com __asm { 67938d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com mov eax, [esp + 4] // src_argb 67948d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com mov edx, [esp + 8] // dst_bayer 67958d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com movd xmm5, [esp + 12] // selector 67968d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com mov ecx, [esp + 16] // pix 67978d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com pshufd xmm5, xmm5, 0 67988d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com 6799c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 68008d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com wloop: 68018d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com movdqa xmm0, [eax] 6802e8df16bd7c44e58ea925c51ea82a34144ada3956fbarchard@google.com movdqa xmm1, [eax + 16] 6803e8df16bd7c44e58ea925c51ea82a34144ada3956fbarchard@google.com lea eax, [eax + 32] 68048d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com pshufb xmm0, xmm5 6805e8df16bd7c44e58ea925c51ea82a34144ada3956fbarchard@google.com pshufb xmm1, xmm5 6806e8df16bd7c44e58ea925c51ea82a34144ada3956fbarchard@google.com punpckldq xmm0, xmm1 6807e8df16bd7c44e58ea925c51ea82a34144ada3956fbarchard@google.com sub ecx, 8 6808e8df16bd7c44e58ea925c51ea82a34144ada3956fbarchard@google.com movq qword ptr [edx], xmm0 6809e8df16bd7c44e58ea925c51ea82a34144ada3956fbarchard@google.com lea edx, [edx + 8] 68108d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com jg wloop 68118d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com ret 68128d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com } 68138d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com} 68148d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com 681508b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com// Specialized ARGB to Bayer that just isolates G channel. 681608b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com__declspec(naked) __declspec(align(16)) 681708b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.comvoid ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer, 681808b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com uint32 selector, int pix) { 681908b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com __asm { 682008b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com mov eax, [esp + 4] // src_argb 682108b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com mov edx, [esp + 8] // dst_bayer 682208b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com // selector 682308b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com mov ecx, [esp + 16] // pix 682408b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com pcmpeqb xmm5, xmm5 // generate mask 0x000000ff 682508b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com psrld xmm5, 24 682608b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com 6827c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 682808b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com wloop: 682908b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com movdqa xmm0, [eax] 683008b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com movdqa xmm1, [eax + 16] 683108b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com lea eax, [eax + 32] 683208b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com psrld xmm0, 8 // Move green to bottom. 683308b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com psrld xmm1, 8 683408b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com pand xmm0, xmm5 683508b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com pand xmm1, xmm5 68364c736098d6c10a9b4f407b5350f8a0ba5848c22dfbarchard@google.com packssdw xmm0, xmm1 683708b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com packuswb xmm0, xmm1 683808b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com sub ecx, 8 683908b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com movq qword ptr [edx], xmm0 684008b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com lea edx, [edx + 8] 684108b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com jg wloop 684208b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com ret 684308b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com } 684408b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com} 684508b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com 68461096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. 68471096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com__declspec(naked) __declspec(align(16)) 68481096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.comvoid ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, 68491096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com const uint8* shuffler, int pix) { 68501096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com __asm { 68511096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com mov eax, [esp + 4] // src_argb 6852212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com mov edx, [esp + 8] // dst_argb 68531096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com mov ecx, [esp + 12] // shuffler 68541096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com movdqa xmm5, [ecx] 68551096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com mov ecx, [esp + 16] // pix 68561096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com 6857c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 68581096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com wloop: 68591096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com movdqa xmm0, [eax] 68601096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com movdqa xmm1, [eax + 16] 68611096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com lea eax, [eax + 32] 68621096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com pshufb xmm0, xmm5 68631096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com pshufb xmm1, xmm5 68641096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com sub ecx, 8 68651096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com movdqa [edx], xmm0 68661096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com movdqa [edx + 16], xmm1 68671096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com lea edx, [edx + 32] 68681096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com jg wloop 68691096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com ret 68701096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com } 68711096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com} 68721096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com 68731096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com__declspec(naked) __declspec(align(16)) 68741096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.comvoid ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb, 68751096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com const uint8* shuffler, int pix) { 68761096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com __asm { 68771096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com mov eax, [esp + 4] // src_argb 6878212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com mov edx, [esp + 8] // dst_argb 68791096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com mov ecx, [esp + 12] // shuffler 68801096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com movdqa xmm5, [ecx] 68811096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com mov ecx, [esp + 16] // pix 68821096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com 6883c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 68841096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com wloop: 68851096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com movdqu xmm0, [eax] 68861096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com movdqu xmm1, [eax + 16] 68871096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com lea eax, [eax + 32] 68881096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com pshufb xmm0, xmm5 68891096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com pshufb xmm1, xmm5 68901096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com sub ecx, 8 68911096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com movdqu [edx], xmm0 68921096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com movdqu [edx + 16], xmm1 68931096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com lea edx, [edx + 32] 68941096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com jg wloop 68951096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com ret 68961096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com } 68971096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com} 68981096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com 68991096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com#ifdef HAS_ARGBSHUFFLEROW_AVX2 69001096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com__declspec(naked) __declspec(align(16)) 69011096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.comvoid ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb, 69021096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com const uint8* shuffler, int pix) { 69031096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com __asm { 69041096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com mov eax, [esp + 4] // src_argb 6905212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com mov edx, [esp + 8] // dst_argb 69061096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com mov ecx, [esp + 12] // shuffler 6907446f91d040aea92c0522745d176fe8017bd22382fbarchard@google.com vbroadcastf128 ymm5, [ecx] // same shuffle in high as low. 69081096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com mov ecx, [esp + 16] // pix 69091096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com 6910c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 69111096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com wloop: 69121096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com vmovdqu ymm0, [eax] 69131096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com vmovdqu ymm1, [eax + 32] 69141096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com lea eax, [eax + 64] 69151096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com vpshufb ymm0, ymm0, ymm5 69161096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com vpshufb ymm1, ymm1, ymm5 69171096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com sub ecx, 16 69181096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com vmovdqu [edx], ymm0 69191096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com vmovdqu [edx + 32], ymm1 69201096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com lea edx, [edx + 64] 69211096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com jg wloop 69229b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com 69239b4c00b908d37727c6caf82337813d567732be1cfbarchard@google.com vzeroupper 69241096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com ret 69251096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com } 69261096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com} 69278b0cdb4a6e3bc468b1901dcfff7acc93bbb6a981fbarchard@google.com#endif // HAS_ARGBSHUFFLEROW_AVX2 69281096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com 6929212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com__declspec(naked) __declspec(align(16)) 6930212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.comvoid ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb, 6931212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com const uint8* shuffler, int pix) { 6932212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com __asm { 6933212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com push ebx 6934212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com push esi 6935212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com mov eax, [esp + 8 + 4] // src_argb 6936212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com mov edx, [esp + 8 + 8] // dst_argb 6937212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com mov esi, [esp + 8 + 12] // shuffler 6938212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com mov ecx, [esp + 8 + 16] // pix 69398b0cdb4a6e3bc468b1901dcfff7acc93bbb6a981fbarchard@google.com pxor xmm5, xmm5 6940212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com 6941212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com mov ebx, [esi] // shuffler 6942212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com cmp ebx, 0x03000102 6943212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com je shuf_3012 6944212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com cmp ebx, 0x00010203 6945212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com je shuf_0123 6946212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com cmp ebx, 0x00030201 6947212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com je shuf_0321 6948212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com cmp ebx, 0x02010003 6949212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com je shuf_2103 6950212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com 6951212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com // TODO(fbarchard): Use one source pointer and 3 offsets. 6952212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com shuf_any1: 6953212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movzx ebx, byte ptr [esi] 6954212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movzx ebx, byte ptr [eax + ebx] 6955212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com mov [edx], bl 6956212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movzx ebx, byte ptr [esi + 1] 6957212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movzx ebx, byte ptr [eax + ebx] 6958212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com mov [edx + 1], bl 6959212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movzx ebx, byte ptr [esi + 2] 6960212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movzx ebx, byte ptr [eax + ebx] 6961212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com mov [edx + 2], bl 6962212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movzx ebx, byte ptr [esi + 3] 6963212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movzx ebx, byte ptr [eax + ebx] 6964212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com mov [edx + 3], bl 6965212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com lea eax, [eax + 4] 6966212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com lea edx, [edx + 4] 6967212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com sub ecx, 1 6968212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com jg shuf_any1 6969212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com jmp shuf99 6970212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com 6971c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6972212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com shuf_0123: 6973212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movdqu xmm0, [eax] 6974212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com lea eax, [eax + 16] 6975212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movdqa xmm1, xmm0 69768b0cdb4a6e3bc468b1901dcfff7acc93bbb6a981fbarchard@google.com punpcklbw xmm0, xmm5 69778b0cdb4a6e3bc468b1901dcfff7acc93bbb6a981fbarchard@google.com punpckhbw xmm1, xmm5 6978212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pshufhw xmm0, xmm0, 01Bh // 1B = 00011011 = 0x0123 = BGRAToARGB 6979212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pshuflw xmm0, xmm0, 01Bh 6980212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pshufhw xmm1, xmm1, 01Bh 6981212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pshuflw xmm1, xmm1, 01Bh 6982212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com packuswb xmm0, xmm1 6983212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com sub ecx, 4 6984212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movdqu [edx], xmm0 6985212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com lea edx, [edx + 16] 6986212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com jg shuf_0123 6987212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com jmp shuf99 6988212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com 6989c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 6990212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com shuf_0321: 6991212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movdqu xmm0, [eax] 6992212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com lea eax, [eax + 16] 6993212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movdqa xmm1, xmm0 69948b0cdb4a6e3bc468b1901dcfff7acc93bbb6a981fbarchard@google.com punpcklbw xmm0, xmm5 69958b0cdb4a6e3bc468b1901dcfff7acc93bbb6a981fbarchard@google.com punpckhbw xmm1, xmm5 6996212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pshufhw xmm0, xmm0, 039h // 39 = 00111001 = 0x0321 = RGBAToARGB 6997212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pshuflw xmm0, xmm0, 039h 6998212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pshufhw xmm1, xmm1, 039h 6999212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pshuflw xmm1, xmm1, 039h 7000212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com packuswb xmm0, xmm1 7001212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com sub ecx, 4 7002212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movdqu [edx], xmm0 7003212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com lea edx, [edx + 16] 7004212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com jg shuf_0321 7005212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com jmp shuf99 7006212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com 7007c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 7008212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com shuf_2103: 7009212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movdqu xmm0, [eax] 7010212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com lea eax, [eax + 16] 7011212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movdqa xmm1, xmm0 70128b0cdb4a6e3bc468b1901dcfff7acc93bbb6a981fbarchard@google.com punpcklbw xmm0, xmm5 70138b0cdb4a6e3bc468b1901dcfff7acc93bbb6a981fbarchard@google.com punpckhbw xmm1, xmm5 7014212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pshufhw xmm0, xmm0, 093h // 93 = 10010011 = 0x2103 = ARGBToRGBA 7015212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pshuflw xmm0, xmm0, 093h 7016212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pshufhw xmm1, xmm1, 093h 7017212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pshuflw xmm1, xmm1, 093h 7018212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com packuswb xmm0, xmm1 7019212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com sub ecx, 4 7020212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movdqu [edx], xmm0 7021212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com lea edx, [edx + 16] 7022212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com jg shuf_2103 7023212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com jmp shuf99 7024212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com 7025c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 7026212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com shuf_3012: 7027212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movdqu xmm0, [eax] 7028212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com lea eax, [eax + 16] 7029212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movdqa xmm1, xmm0 70308b0cdb4a6e3bc468b1901dcfff7acc93bbb6a981fbarchard@google.com punpcklbw xmm0, xmm5 70318b0cdb4a6e3bc468b1901dcfff7acc93bbb6a981fbarchard@google.com punpckhbw xmm1, xmm5 7032212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pshufhw xmm0, xmm0, 0C6h // C6 = 11000110 = 0x3012 = ABGRToARGB 7033212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pshuflw xmm0, xmm0, 0C6h 7034212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pshufhw xmm1, xmm1, 0C6h 7035212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pshuflw xmm1, xmm1, 0C6h 7036212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com packuswb xmm0, xmm1 7037212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com sub ecx, 4 7038212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com movdqu [edx], xmm0 7039212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com lea edx, [edx + 16] 7040212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com jg shuf_3012 7041212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com 7042212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com shuf99: 7043212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pop esi 7044212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com pop ebx 7045212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com ret 7046212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com } 7047212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com} 7048212a1a5000dbdd6bcff7ec355fa2bfa6a00183f8fbarchard@google.com 70499de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com// YUY2 - Macro-pixel = 2 image pixels 70509de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4.... 70519de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com 70529de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com// UYVY - Macro-pixel = 2 image pixels 70539de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com// U0Y0V0Y1 70549de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com 70559de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com__declspec(naked) __declspec(align(16)) 70569de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToYUY2Row_SSE2(const uint8* src_y, 70579de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_u, 70589de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_v, 70599de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_frame, int width) { 70609de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com __asm { 70619de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com push esi 70629de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com push edi 70639de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com mov eax, [esp + 8 + 4] // src_y 70649de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com mov esi, [esp + 8 + 8] // src_u 70659de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com mov edx, [esp + 8 + 12] // src_v 70669de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com mov edi, [esp + 8 + 16] // dst_frame 70679de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com mov ecx, [esp + 8 + 20] // width 70689de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com sub edx, esi 70699de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com 7070c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 70719de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com convertloop: 70729de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com movq xmm2, qword ptr [esi] // U 70739de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com movq xmm3, qword ptr [esi + edx] // V 70749de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com lea esi, [esi + 8] 70759de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com punpcklbw xmm2, xmm3 // UV 7076f8e90176855a21248ef5213b34dadd46118e76fcfbarchard@google.com movdqu xmm0, [eax] // Y 70779de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com lea eax, [eax + 16] 707855c20a861e3a503839fd2007b302bc20c11d9460fbarchard@google.com movdqa xmm1, xmm0 70799de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com punpcklbw xmm0, xmm2 // YUYV 70809de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com punpckhbw xmm1, xmm2 7081f8e90176855a21248ef5213b34dadd46118e76fcfbarchard@google.com movdqu [edi], xmm0 7082f8e90176855a21248ef5213b34dadd46118e76fcfbarchard@google.com movdqu [edi + 16], xmm1 70839de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com lea edi, [edi + 32] 70849de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com sub ecx, 16 70859de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com jg convertloop 70869de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com 70879de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com pop edi 70889de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com pop esi 70899de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com ret 70909de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com } 70919de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com} 70929de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com 70939de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com__declspec(naked) __declspec(align(16)) 70949de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToUYVYRow_SSE2(const uint8* src_y, 70959de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_u, 70969de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_v, 70979de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_frame, int width) { 70989de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com __asm { 70999de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com push esi 71009de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com push edi 71019de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com mov eax, [esp + 8 + 4] // src_y 71029de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com mov esi, [esp + 8 + 8] // src_u 71039de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com mov edx, [esp + 8 + 12] // src_v 71049de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com mov edi, [esp + 8 + 16] // dst_frame 71059de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com mov ecx, [esp + 8 + 20] // width 71069de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com sub edx, esi 71079de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com 7108c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 71099de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com convertloop: 71109de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com movq xmm2, qword ptr [esi] // U 71119de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com movq xmm3, qword ptr [esi + edx] // V 71129de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com lea esi, [esi + 8] 71139de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com punpcklbw xmm2, xmm3 // UV 7114f8e90176855a21248ef5213b34dadd46118e76fcfbarchard@google.com movdqu xmm0, [eax] // Y 71159de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com movdqa xmm1, xmm2 71169de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com lea eax, [eax + 16] 71179de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com punpcklbw xmm1, xmm0 // UYVY 71189de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com punpckhbw xmm2, xmm0 7119f8e90176855a21248ef5213b34dadd46118e76fcfbarchard@google.com movdqu [edi], xmm1 7120f8e90176855a21248ef5213b34dadd46118e76fcfbarchard@google.com movdqu [edi + 16], xmm2 71219de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com lea edi, [edi + 32] 71229de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com sub ecx, 16 71239de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com jg convertloop 71249de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com 71259de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com pop edi 71269de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com pop esi 71279de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com ret 71289de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com } 71299de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com} 7130747ceb9fa5cea5c923d4b08acbb7f1cfa39f138efbarchard@google.com 7131ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com#ifdef HAS_ARGBPOLYNOMIALROW_SSE2 7132ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com__declspec(naked) __declspec(align(16)) 7133ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.comvoid ARGBPolynomialRow_SSE2(const uint8* src_argb, 7134ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com uint8* dst_argb, const float* poly, 7135ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com int width) { 7136ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com __asm { 7137c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com push esi 7138c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com mov eax, [esp + 4 + 4] /* src_argb */ 7139c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com mov edx, [esp + 4 + 8] /* dst_argb */ 7140c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com mov esi, [esp + 4 + 12] /* poly */ 7141c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com mov ecx, [esp + 4 + 16] /* width */ 71426da76f3b34e80da2ffebff92d57fd08a93964942fbarchard@google.com pxor xmm3, xmm3 // 0 constant for zero extending bytes to ints. 7143ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com 7144c3b04796c2c77e69f6bd7ca294825d31eae528bffbarchard@google.com // 2 pixel loop. 7145c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 7146ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com convertloop: 71473075de82856a044ebd3e808b2f0918d2b0e9713cfbarchard@google.com// pmovzxbd xmm0, dword ptr [eax] // BGRA pixel 71483075de82856a044ebd3e808b2f0918d2b0e9713cfbarchard@google.com// pmovzxbd xmm4, dword ptr [eax + 4] // BGRA pixel 7149c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com movq xmm0, qword ptr [eax] // BGRABGRA 7150c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com lea eax, [eax + 8] 7151ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com punpcklbw xmm0, xmm3 7152c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com movdqa xmm4, xmm0 7153c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com punpcklwd xmm0, xmm3 // pixel 0 7154c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com punpckhwd xmm4, xmm3 // pixel 1 7155ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com cvtdq2ps xmm0, xmm0 // 4 floats 7156c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com cvtdq2ps xmm4, xmm4 7157ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com movdqa xmm1, xmm0 // X 7158c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com movdqa xmm5, xmm4 7159c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com mulps xmm0, [esi + 16] // C1 * X 7160c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com mulps xmm4, [esi + 16] 7161c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com addps xmm0, [esi] // result = C0 + C1 * X 7162c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com addps xmm4, [esi] 7163ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com movdqa xmm2, xmm1 7164c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com movdqa xmm6, xmm5 7165ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com mulps xmm2, xmm1 // X * X 7166c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com mulps xmm6, xmm5 7167ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com mulps xmm1, xmm2 // X * X * X 7168c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com mulps xmm5, xmm6 7169c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com mulps xmm2, [esi + 32] // C2 * X * X 7170c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com mulps xmm6, [esi + 32] 7171c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com mulps xmm1, [esi + 48] // C3 * X * X * X 7172c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com mulps xmm5, [esi + 48] 7173ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com addps xmm0, xmm2 // result += C2 * X * X 7174c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com addps xmm4, xmm6 7175ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com addps xmm0, xmm1 // result += C3 * X * X * X 7176c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com addps xmm4, xmm5 7177ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com cvttps2dq xmm0, xmm0 7178c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com cvttps2dq xmm4, xmm4 7179c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com packuswb xmm0, xmm4 7180ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com packuswb xmm0, xmm0 7181c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com sub ecx, 2 7182c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com movq qword ptr [edx], xmm0 7183c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com lea edx, [edx + 8] 7184ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com jg convertloop 7185c3c06ec328b5ea6c57012d3ca3ca442f22aad681fbarchard@google.com pop esi 7186ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com ret 7187ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com } 7188ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com} 7189ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com#endif // HAS_ARGBPOLYNOMIALROW_SSE2 7190ae0091e3a74603b23c91f417c8ea023cd43e7e9cfbarchard@google.com 71916da76f3b34e80da2ffebff92d57fd08a93964942fbarchard@google.com#ifdef HAS_ARGBPOLYNOMIALROW_AVX2 71926da76f3b34e80da2ffebff92d57fd08a93964942fbarchard@google.com__declspec(naked) __declspec(align(16)) 71936da76f3b34e80da2ffebff92d57fd08a93964942fbarchard@google.comvoid ARGBPolynomialRow_AVX2(const uint8* src_argb, 7194c3b04796c2c77e69f6bd7ca294825d31eae528bffbarchard@google.com uint8* dst_argb, const float* poly, 7195c3b04796c2c77e69f6bd7ca294825d31eae528bffbarchard@google.com int width) { 71966da76f3b34e80da2ffebff92d57fd08a93964942fbarchard@google.com __asm { 71976da76f3b34e80da2ffebff92d57fd08a93964942fbarchard@google.com mov eax, [esp + 4] /* src_argb */ 71986da76f3b34e80da2ffebff92d57fd08a93964942fbarchard@google.com mov edx, [esp + 8] /* dst_argb */ 7199446f91d040aea92c0522745d176fe8017bd22382fbarchard@google.com mov ecx, [esp + 12] /* poly */ 7200446f91d040aea92c0522745d176fe8017bd22382fbarchard@google.com vbroadcastf128 ymm4, [ecx] // C0 7201446f91d040aea92c0522745d176fe8017bd22382fbarchard@google.com vbroadcastf128 ymm5, [ecx + 16] // C1 7202446f91d040aea92c0522745d176fe8017bd22382fbarchard@google.com vbroadcastf128 ymm6, [ecx + 32] // C2 7203446f91d040aea92c0522745d176fe8017bd22382fbarchard@google.com vbroadcastf128 ymm7, [ecx + 48] // C3 72046da76f3b34e80da2ffebff92d57fd08a93964942fbarchard@google.com mov ecx, [esp + 16] /* width */ 72056da76f3b34e80da2ffebff92d57fd08a93964942fbarchard@google.com 7206c3b04796c2c77e69f6bd7ca294825d31eae528bffbarchard@google.com // 2 pixel loop. 7207c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com align 4 72086da76f3b34e80da2ffebff92d57fd08a93964942fbarchard@google.com convertloop: 72092bbb64df2c997725ab1a024a0a21f1c63f895797fbarchard@google.com vpmovzxbd ymm0, qword ptr [eax] // 2 BGRA pixels 72102bbb64df2c997725ab1a024a0a21f1c63f895797fbarchard@google.com lea eax, [eax + 8] 72112bbb64df2c997725ab1a024a0a21f1c63f895797fbarchard@google.com vcvtdq2ps ymm0, ymm0 // X 8 floats 72122bbb64df2c997725ab1a024a0a21f1c63f895797fbarchard@google.com vmulps ymm2, ymm0, ymm0 // X * X 72132bbb64df2c997725ab1a024a0a21f1c63f895797fbarchard@google.com vmulps ymm3, ymm0, ymm7 // C3 * X 72142bbb64df2c997725ab1a024a0a21f1c63f895797fbarchard@google.com vfmadd132ps ymm0, ymm4, ymm5 // result = C0 + C1 * X 72152bbb64df2c997725ab1a024a0a21f1c63f895797fbarchard@google.com vfmadd231ps ymm0, ymm2, ymm6 // result += C2 * X * X 72162bbb64df2c997725ab1a024a0a21f1c63f895797fbarchard@google.com vfmadd231ps ymm0, ymm2, ymm3 // result += C3 * X * X * X 72172bbb64df2c997725ab1a024a0a21f1c63f895797fbarchard@google.com vcvttps2dq ymm0, ymm0 72182bbb64df2c997725ab1a024a0a21f1c63f895797fbarchard@google.com vpackusdw ymm0, ymm0, ymm0 // b0g0r0a0_00000000_b0g0r0a0_00000000 72192bbb64df2c997725ab1a024a0a21f1c63f895797fbarchard@google.com vpermq ymm0, ymm0, 0xd8 // b0g0r0a0_b0g0r0a0_00000000_00000000 72202bbb64df2c997725ab1a024a0a21f1c63f895797fbarchard@google.com vpackuswb xmm0, xmm0, xmm0 // bgrabgra_00000000_00000000_00000000 72212bbb64df2c997725ab1a024a0a21f1c63f895797fbarchard@google.com sub ecx, 2 72222bbb64df2c997725ab1a024a0a21f1c63f895797fbarchard@google.com vmovq qword ptr [edx], xmm0 72232bbb64df2c997725ab1a024a0a21f1c63f895797fbarchard@google.com lea edx, [edx + 8] 72242bbb64df2c997725ab1a024a0a21f1c63f895797fbarchard@google.com jg convertloop 72256da76f3b34e80da2ffebff92d57fd08a93964942fbarchard@google.com vzeroupper 72266da76f3b34e80da2ffebff92d57fd08a93964942fbarchard@google.com ret 72276da76f3b34e80da2ffebff92d57fd08a93964942fbarchard@google.com } 72286da76f3b34e80da2ffebff92d57fd08a93964942fbarchard@google.com} 72296da76f3b34e80da2ffebff92d57fd08a93964942fbarchard@google.com#endif // HAS_ARGBPOLYNOMIALROW_AVX2 72306da76f3b34e80da2ffebff92d57fd08a93964942fbarchard@google.com 72316f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com#ifdef HAS_ARGBCOLORTABLEROW_X86 72326f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com// Tranform ARGB pixels with color table. 72336f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com__declspec(naked) __declspec(align(16)) 72346f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.comvoid ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, 72356f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com int width) { 72366f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com __asm { 72376f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com push esi 72386f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov eax, [esp + 4 + 4] /* dst_argb */ 72396f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov esi, [esp + 4 + 8] /* table_argb */ 72406f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov ecx, [esp + 4 + 12] /* width */ 72417a0d01ef8ba25bdad7df1f27d8b0969f0e0a9185fbarchard@google.com 72426f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com // 1 pixel loop. 72436f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com align 4 72446f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com convertloop: 72456f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax] 72466f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com lea eax, [eax + 4] 72476f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx * 4] 72486f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [eax - 4], dl 72496f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax - 4 + 1] 72506f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx * 4 + 1] 72516f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [eax - 4 + 1], dl 72526f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax - 4 + 2] 72536f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx * 4 + 2] 72546f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [eax - 4 + 2], dl 72556f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax - 4 + 3] 72566f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx * 4 + 3] 72576f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [eax - 4 + 3], dl 72586f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com dec ecx 72596f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com jg convertloop 72606f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com pop esi 72616f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com ret 72626f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com } 72636f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com} 72646f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com#endif // HAS_ARGBCOLORTABLEROW_X86 72657a0d01ef8ba25bdad7df1f27d8b0969f0e0a9185fbarchard@google.com 72666f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com#ifdef HAS_RGBCOLORTABLEROW_X86 72676f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com// Tranform RGB pixels with color table. 72687a0d01ef8ba25bdad7df1f27d8b0969f0e0a9185fbarchard@google.com__declspec(naked) __declspec(align(16)) 72696f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.comvoid RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) { 72707a0d01ef8ba25bdad7df1f27d8b0969f0e0a9185fbarchard@google.com __asm { 72716f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com push esi 72726f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov eax, [esp + 4 + 4] /* dst_argb */ 72736f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov esi, [esp + 4 + 8] /* table_argb */ 72746f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov ecx, [esp + 4 + 12] /* width */ 72756f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com 72766f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com // 1 pixel loop. 72776f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com align 4 72786f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com convertloop: 72796f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax] 72806f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com lea eax, [eax + 4] 72816f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx * 4] 72826f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [eax - 4], dl 72836f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax - 4 + 1] 72846f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx * 4 + 1] 72856f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [eax - 4 + 1], dl 72866f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax - 4 + 2] 72876f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx * 4 + 2] 72886f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [eax - 4 + 2], dl 72896f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com dec ecx 72906f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com jg convertloop 72916f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com 72926f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com pop esi 72937a0d01ef8ba25bdad7df1f27d8b0969f0e0a9185fbarchard@google.com ret 72947a0d01ef8ba25bdad7df1f27d8b0969f0e0a9185fbarchard@google.com } 72957a0d01ef8ba25bdad7df1f27d8b0969f0e0a9185fbarchard@google.com} 72966f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com#endif // HAS_RGBCOLORTABLEROW_X86 72977a0d01ef8ba25bdad7df1f27d8b0969f0e0a9185fbarchard@google.com 72986f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3 72996f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com// Tranform RGB pixels with luma table. 73006f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com__declspec(naked) __declspec(align(16)) 730111a0d48e45a7acd5aaf6b914caeee06432f06b6bfbarchard@google.comvoid ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb, 730211a0d48e45a7acd5aaf6b914caeee06432f06b6bfbarchard@google.com int width, 730311a0d48e45a7acd5aaf6b914caeee06432f06b6bfbarchard@google.com const uint8* luma, uint32 lumacoeff) { 73046f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com __asm { 73056f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com push esi 73066f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com push edi 73076f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov eax, [esp + 8 + 4] /* src_argb */ 73086f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov edi, [esp + 8 + 8] /* dst_argb */ 730911a0d48e45a7acd5aaf6b914caeee06432f06b6bfbarchard@google.com mov ecx, [esp + 8 + 12] /* width */ 731011a0d48e45a7acd5aaf6b914caeee06432f06b6bfbarchard@google.com movd xmm2, dword ptr [esp + 8 + 16] // luma table 731111a0d48e45a7acd5aaf6b914caeee06432f06b6bfbarchard@google.com movd xmm3, dword ptr [esp + 8 + 20] // lumacoeff 73126f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com pshufd xmm2, xmm2, 0 731321796c94aa3a448a839e6a18aad060f018958156fbarchard@google.com pshufd xmm3, xmm3, 0 731411a0d48e45a7acd5aaf6b914caeee06432f06b6bfbarchard@google.com pcmpeqb xmm4, xmm4 // generate mask 0xff00ff00 73156f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com psllw xmm4, 8 73166f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com pxor xmm5, xmm5 73176f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com 73186f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com // 4 pixel loop. 73196f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com align 4 73206f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com convertloop: 7321ca8f826ba3894ba1db7e58b38e6469fd1d3ab59ffbarchard@google.com movdqu xmm0, qword ptr [eax] // generate luma ptr 73226f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com pmaddubsw xmm0, xmm3 73236f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com phaddw xmm0, xmm0 73246f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com pand xmm0, xmm4 // mask out low bits 73256f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com punpcklwd xmm0, xmm5 73266f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com paddd xmm0, xmm2 // add table base 73276f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movd esi, xmm0 73286f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32 73296f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com 73306f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax] 73316f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx] 73326f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [edi], dl 73336f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax + 1] 73346f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx] 73356f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [edi + 1], dl 73366f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax + 2] 73376f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx] 73386f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [edi + 2], dl 73396f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax + 3] // copy alpha. 73406f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [edi + 3], dl 73416f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com 73426f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movd esi, xmm0 73436f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32 73446f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com 73456f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax + 4] 73466f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx] 73476f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [edi + 4], dl 73486f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax + 5] 73496f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx] 73506f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [edi + 5], dl 73516f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax + 6] 73526f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx] 73536f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [edi + 6], dl 73546f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax + 7] // copy alpha. 73556f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [edi + 7], dl 73566f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com 73576f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movd esi, xmm0 73586f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32 73596f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com 73606f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax + 8] 73616f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx] 73626f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [edi + 8], dl 73636f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax + 9] 73646f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx] 73656f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [edi + 9], dl 73666f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax + 10] 73676f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx] 73686f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [edi + 10], dl 73696f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax + 11] // copy alpha. 73706f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [edi + 11], dl 73716f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com 73726f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movd esi, xmm0 73736f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com 73746f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax + 12] 73756f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx] 73766f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [edi + 12], dl 73776f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax + 13] 73786f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx] 73796f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [edi + 13], dl 73806f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax + 14] 73816f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [esi + edx] 73826f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [edi + 14], dl 73836f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com movzx edx, byte ptr [eax + 15] // copy alpha. 73846f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com mov byte ptr [edi + 15], dl 73856f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com 73866f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com sub ecx, 4 73876f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com lea eax, [eax + 16] 73886f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com lea edi, [edi + 16] 73896f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com jg convertloop 73906f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com 73916f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com pop edi 73926f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com pop esi 73936f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com ret 73947a0d01ef8ba25bdad7df1f27d8b0969f0e0a9185fbarchard@google.com } 73957a0d01ef8ba25bdad7df1f27d8b0969f0e0a9185fbarchard@google.com} 73966f7e514caa3e1c57ab1fd765151c52b9156113befbarchard@google.com#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 73977a0d01ef8ba25bdad7df1f27d8b0969f0e0a9185fbarchard@google.com 7398e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#endif // defined(_M_X64) 7399e6dd1fa024bac6c62cbed8e5227cc9bc311a9d9dfbarchard@google.com#endif // !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) 7400221e602f8a726f7457a0d521b5bcca05d89215bbfbarchard@google.com 7401fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#ifdef __cplusplus 74025327adda475f79405a008a967d30bf7c92e994admikhal@webrtc.org} // extern "C" 7403fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com} // namespace libyuv 7404fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#endif 7405