133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp/* 233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * Copyright 2011 The LibYuv Project Authors. All rights reserved. 333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * 433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * Use of this source code is governed by a BSD-style license 533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * that can be found in the LICENSE file in the root of the source 633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * tree. An additional intellectual property rights grant can be found 733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * in the file PATENTS. All contributing project authors may 833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * be found in the AUTHORS file in the root of the source tree. 933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp */ 1033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 1133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include "libyuv/scale.h" 1233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 1333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include <assert.h> 1433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include <string.h> 1533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include <stdlib.h> // For getenv() 1633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 1733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include "libyuv/cpu_id.h" 1833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include "libyuv/planar_functions.h" // For CopyARGB 1933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include "libyuv/row.h" 2033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 2133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef __cplusplus 2233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampnamespace libyuv { 2333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampextern "C" { 2433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 2533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 2633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Bilinear SSE2 is disabled. 2733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define SSE2_DISABLED 1 2833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 2933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// ARGB scaling uses bilinear or point, but not box filter. 3033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp/** 3133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * SSE2 downscalers with bilinear interpolation. 3233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp */ 3333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 3433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) 3533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 3633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_SCALEARGBROWDOWN2_SSE2 3733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6) 3833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. 3933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp__declspec(naked) __declspec(align(16)) 4033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDown2_SSE2(const uint8* src_ptr, 4133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ptrdiff_t /* src_stride */, 4233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_ptr, int dst_width) { 4333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp __asm { 4433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov eax, [esp + 4] // src_ptr 4533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // src_stride ignored 4633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov edx, [esp + 12] // dst_ptr 4733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov ecx, [esp + 16] // dst_width 4833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 4933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp align 16 5033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp wloop: 5133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm0, [eax] 5233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm1, [eax + 16] 5333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea eax, [eax + 32] 5433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp shufps xmm0, xmm1, 0x88 5533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp sub ecx, 4 5633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa [edx], xmm0 5733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea edx, [edx + 16] 5833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp jg wloop 5933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 6033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ret 6133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 6233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 6333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 6433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Blends 8x2 rectangle to 4x1. 6533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. 6633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp__declspec(naked) __declspec(align(16)) 6733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDown2Int_SSE2(const uint8* src_ptr, 6833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ptrdiff_t src_stride, 6933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_ptr, int dst_width) { 7033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp __asm { 7133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp push esi 7233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov eax, [esp + 4 + 4] // src_ptr 7333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov esi, [esp + 4 + 8] // src_stride 7433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov edx, [esp + 4 + 12] // dst_ptr 7533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov ecx, [esp + 4 + 16] // dst_width 7633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 7733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp align 16 7833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp wloop: 7933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm0, [eax] 8033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm1, [eax + 16] 8133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm2, [eax + esi] 8233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm3, [eax + esi + 16] 8333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea eax, [eax + 32] 8433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pavgb xmm0, xmm2 // average rows 8533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pavgb xmm1, xmm3 8633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm2, xmm0 // average columns (8 to 4 pixels) 8733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp shufps xmm0, xmm1, 0x88 // even pixels 8833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp shufps xmm2, xmm1, 0xdd // odd pixels 8933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pavgb xmm0, xmm2 9033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp sub ecx, 4 9133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa [edx], xmm0 9233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea edx, [edx + 16] 9333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp jg wloop 9433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 9533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop esi 9633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ret 9733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 9833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 9933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 10033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_SCALEARGBROWDOWNEVEN_SSE2 10133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Reads 4 pixels at a time. 10233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Alignment requirement: dst_ptr 16 byte aligned. 10333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp__declspec(naked) __declspec(align(16)) 10433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, 10533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int src_stepx, 10633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_ptr, int dst_width) { 10733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp __asm { 10833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp push ebx 10933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp push edi 11033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov eax, [esp + 8 + 4] // src_ptr 11133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // src_stride ignored 11233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov ebx, [esp + 8 + 12] // src_stepx 11333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov edx, [esp + 8 + 16] // dst_ptr 11433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov ecx, [esp + 8 + 20] // dst_width 11533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea ebx, [ebx * 4] 11633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea edi, [ebx + ebx * 2] 11733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 11833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp align 16 11933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp wloop: 12033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movd xmm0, [eax] 12133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movd xmm1, [eax + ebx] 12233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp punpckldq xmm0, xmm1 12333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movd xmm2, [eax + ebx * 2] 12433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movd xmm3, [eax + edi] 12533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea eax, [eax + ebx * 4] 12633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp punpckldq xmm2, xmm3 12733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp punpcklqdq xmm0, xmm2 12833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp sub ecx, 4 12933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa [edx], xmm0 13033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea edx, [edx + 16] 13133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp jg wloop 13233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 13333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop edi 13433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop ebx 13533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ret 13633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 13733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 13833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 13933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Blends four 2x2 to 4x1. 14033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Alignment requirement: dst_ptr 16 byte aligned. 14133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp__declspec(naked) __declspec(align(16)) 14233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_ptr, 14333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ptrdiff_t src_stride, 14433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int src_stepx, 14533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_ptr, int dst_width) { 14633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp __asm { 14733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp push ebx 14833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp push esi 14933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp push edi 15033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov eax, [esp + 12 + 4] // src_ptr 15133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov esi, [esp + 12 + 8] // src_stride 15233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov ebx, [esp + 12 + 12] // src_stepx 15333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov edx, [esp + 12 + 16] // dst_ptr 15433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov ecx, [esp + 12 + 20] // dst_width 15533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea esi, [eax + esi] // row1 pointer 15633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea ebx, [ebx * 4] 15733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea edi, [ebx + ebx * 2] 15833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 15933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp align 16 16033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp wloop: 16133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movq xmm0, qword ptr [eax] // row0 4 pairs 16233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movhps xmm0, qword ptr [eax + ebx] 16333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movq xmm1, qword ptr [eax + ebx * 2] 16433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movhps xmm1, qword ptr [eax + edi] 16533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea eax, [eax + ebx * 4] 16633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movq xmm2, qword ptr [esi] // row1 4 pairs 16733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movhps xmm2, qword ptr [esi + ebx] 16833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movq xmm3, qword ptr [esi + ebx * 2] 16933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movhps xmm3, qword ptr [esi + edi] 17033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea esi, [esi + ebx * 4] 17133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pavgb xmm0, xmm2 // average rows 17233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pavgb xmm1, xmm3 17333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm2, xmm0 // average columns (8 to 4 pixels) 17433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp shufps xmm0, xmm1, 0x88 // even pixels 17533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp shufps xmm2, xmm1, 0xdd // odd pixels 17633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pavgb xmm0, xmm2 17733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp sub ecx, 4 17833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa [edx], xmm0 17933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea edx, [edx + 16] 18033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp jg wloop 18133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 18233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop edi 18333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop esi 18433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop ebx 18533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ret 18633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 18733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 18833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 18933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version. 19033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifndef SSE2_DISABLED 19133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_SCALEARGBFILTERROWS_SSE2_DISABLED 19233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp__declspec(naked) __declspec(align(16)) 19333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, 19433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ptrdiff_t src_stride, int dst_width, 19533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int source_y_fraction) { 19633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp __asm { 19733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp push esi 19833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp push edi 19933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov edi, [esp + 8 + 4] // dst_ptr 20033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov esi, [esp + 8 + 8] // src_ptr 20133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov edx, [esp + 8 + 12] // src_stride 20233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov ecx, [esp + 8 + 16] // dst_width 20333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov eax, [esp + 8 + 20] // source_y_fraction (0..255) 20433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp sub edi, esi 20533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp cmp eax, 0 20633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp je xloop1 20733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp cmp eax, 128 20833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp je xloop2 20933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 21033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movd xmm5, eax // xmm5 = y fraction 21133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp punpcklbw xmm5, xmm5 21233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp punpcklwd xmm5, xmm5 21333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pshufd xmm5, xmm5, 0 21433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pxor xmm4, xmm4 21533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 21633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // f * row1 + (1 - frac) row0 21733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // frac * (row1 - row0) + row0 21833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp align 16 21933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp xloop: 22033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm0, [esi] // row0 22133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm2, [esi + edx] // row1 22233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm1, xmm0 22333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm3, xmm2 22433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp punpcklbw xmm2, xmm4 22533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp punpckhbw xmm3, xmm4 22633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp punpcklbw xmm0, xmm4 22733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp punpckhbw xmm1, xmm4 22833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp psubw xmm2, xmm0 // row1 - row0 22933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp psubw xmm3, xmm1 23033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pmulhw xmm2, xmm5 // scale diff 23133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pmulhw xmm3, xmm5 23233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp paddw xmm0, xmm2 // sum rows 23333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp paddw xmm1, xmm3 23433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp packuswb xmm0, xmm1 23533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp sub ecx, 4 23633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa [esi + edi], xmm0 23733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea esi, [esi + 16] 23833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp jg xloop 23933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 24033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp shufps xmm0, xmm0, 0xff 24133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa [esi + edi], xmm0 // duplicate last pixel for filtering 24233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop edi 24333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop esi 24433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ret 24533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 24633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp align 16 24733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp xloop1: 24833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm0, [esi] 24933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp sub ecx, 4 25033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa [esi + edi], xmm0 25133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea esi, [esi + 16] 25233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp jg xloop1 25333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 25433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp shufps xmm0, xmm0, 0xff 25533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa [esi + edi], xmm0 25633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop edi 25733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop esi 25833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ret 25933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 26033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp align 16 26133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp xloop2: 26233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm0, [esi] 26333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pavgb xmm0, [esi + edx] 26433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp sub ecx, 4 26533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa [esi + edi], xmm0 26633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea esi, [esi + 16] 26733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp jg xloop2 26833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 26933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp shufps xmm0, xmm0, 0xff 27033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa [esi + edi], xmm0 27133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop edi 27233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop esi 27333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ret 27433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 27533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 27633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // SSE2_DISABLED 27733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 27833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version. 27933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_SCALEARGBFILTERROWS_SSSE3 28033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp__declspec(naked) __declspec(align(16)) 28133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr, 28233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ptrdiff_t src_stride, int dst_width, 28333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int source_y_fraction) { 28433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp __asm { 28533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp push esi 28633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp push edi 28733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov edi, [esp + 8 + 4] // dst_ptr 28833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov esi, [esp + 8 + 8] // src_ptr 28933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov edx, [esp + 8 + 12] // src_stride 29033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov ecx, [esp + 8 + 16] // dst_width 29133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp mov eax, [esp + 8 + 20] // source_y_fraction (0..255) 29233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp sub edi, esi 29333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp shr eax, 1 29433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp cmp eax, 0 29533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp je xloop1 29633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp cmp eax, 64 29733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp je xloop2 29833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movd xmm0, eax // high fraction 0..127 29933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp neg eax 30033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp add eax, 128 30133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movd xmm5, eax // low fraction 128..1 30233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp punpcklbw xmm5, xmm0 30333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp punpcklwd xmm5, xmm5 30433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pshufd xmm5, xmm5, 0 30533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 30633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp align 16 30733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp xloop: 30833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm0, [esi] 30933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm2, [esi + edx] 31033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm1, xmm0 31133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp punpcklbw xmm0, xmm2 31233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp punpckhbw xmm1, xmm2 31333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pmaddubsw xmm0, xmm5 31433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pmaddubsw xmm1, xmm5 31533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp psrlw xmm0, 7 31633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp psrlw xmm1, 7 31733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp packuswb xmm0, xmm1 31833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp sub ecx, 4 31933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa [esi + edi], xmm0 32033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea esi, [esi + 16] 32133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp jg xloop 32233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 32333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp shufps xmm0, xmm0, 0xff 32433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa [esi + edi], xmm0 // duplicate last pixel for filtering 32533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop edi 32633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop esi 32733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ret 32833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 32933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp align 16 33033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp xloop1: 33133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm0, [esi] 33233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp sub ecx, 4 33333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa [esi + edi], xmm0 33433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea esi, [esi + 16] 33533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp jg xloop1 33633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 33733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp shufps xmm0, xmm0, 0xff 33833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa [esi + edi], xmm0 33933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop edi 34033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop esi 34133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ret 34233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 34333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp align 16 34433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp xloop2: 34533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa xmm0, [esi] 34633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pavgb xmm0, [esi + edx] 34733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp sub ecx, 4 34833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa [esi + edi], xmm0 34933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp lea esi, [esi + 16] 35033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp jg xloop2 35133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 35233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp shufps xmm0, xmm0, 0xff 35333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp movdqa [esi + edi], xmm0 35433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop edi 35533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp pop esi 35633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ret 35733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 35833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 35933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 36033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#elif !defined(YUV_DISABLE_ASM) && (defined(__x86_64__) || defined(__i386__)) 36133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 36233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// GCC versions of row functions are verbatim conversions from Visual C. 36333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Generated using gcc disassembly on Visual C object file: 36433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// objdump -D yuvscaler.obj >yuvscaler.txt 36533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_SCALEARGBROWDOWN2_SSE2 36633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDown2_SSE2(const uint8* src_ptr, 36733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ptrdiff_t /* src_stride */, 36833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_ptr, int dst_width) { 36933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 37033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 37133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 37233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 37333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 37433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 37533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0x88,%%xmm1,%%xmm0 \n" 37633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 37733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1) \n" 37833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 37933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 38033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_ptr), // %0 38133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_ptr), // %1 38233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_width) // %2 38333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 38433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 38533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 38633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1" 38733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 38833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 38933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 39033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 39133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDown2Int_SSE2(const uint8* src_ptr, 39233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ptrdiff_t src_stride, 39333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_ptr, int dst_width) { 39433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 39533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 39633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 39733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 39833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0),%%xmm1 \n" 39933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0,%3,1),%%xmm2 \n" 40033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa 0x10(%0,%3,1),%%xmm3 \n" 40133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x20(%0),%0 \n" 40233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm2,%%xmm0 \n" 40333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm3,%%xmm1 \n" 40433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm2 \n" 40533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0x88,%%xmm1,%%xmm0 \n" 40633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0xdd,%%xmm1,%%xmm2 \n" 40733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm2,%%xmm0 \n" 40833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 40933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1) \n" 41033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 41133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 41233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_ptr), // %0 41333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_ptr), // %1 41433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_width) // %2 41533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(static_cast<intptr_t>(src_stride)) // %3 41633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 41733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 41833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3" 41933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 42033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 42133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 42233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 42333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_SCALEARGBROWDOWNEVEN_SSE2 42433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Reads 4 pixels at a time. 42533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Alignment requirement: dst_ptr 16 byte aligned. 42633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, 42733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int src_stepx, 42833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_ptr, int dst_width) { 42933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp intptr_t src_stepx_x4 = static_cast<intptr_t>(src_stepx); 43033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp intptr_t src_stepx_x12 = 0; 43133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 43233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x0(,%1,4),%1 \n" 43333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%1,2),%4 \n" 43433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 43533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 43633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%0),%%xmm0 \n" 43733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%0,%1,1),%%xmm1 \n" 43833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm1,%%xmm0 \n" 43933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%0,%1,2),%%xmm2 \n" 44033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd (%0,%4,1),%%xmm3 \n" 44133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%0,%1,4),%0 \n" 44233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm3,%%xmm2 \n" 44333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklqdq %%xmm2,%%xmm0 \n" 44433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%3 \n" 44533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%2) \n" 44633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%2),%2 \n" 44733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 44833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_ptr), // %0 44933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(src_stepx_x4), // %1 45033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_ptr), // %2 45133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_width), // %3 45233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(src_stepx_x12) // %4 45333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 45433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 45533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 45633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3" 45733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 45833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 45933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 46033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 46133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Blends four 2x2 to 4x1. 46233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Alignment requirement: dst_ptr 16 byte aligned. 46333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_ptr, 46433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ptrdiff_t src_stride, int src_stepx, 46533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_ptr, int dst_width) { 46633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp intptr_t src_stepx_x4 = static_cast<intptr_t>(src_stepx); 46733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp intptr_t src_stepx_x12 = 0; 46833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp intptr_t row1 = static_cast<intptr_t>(src_stride); 46933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 47033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x0(,%1,4),%1 \n" 47133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%1,2),%4 \n" 47233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%0,%5,1),%5 \n" 47333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 47433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 47533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%0),%%xmm0 \n" 47633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhps (%0,%1,1),%%xmm0 \n" 47733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%0,%1,2),%%xmm1 \n" 47833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhps (%0,%4,1),%%xmm1 \n" 47933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%0,%1,4),%0 \n" 48033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%5),%%xmm2 \n" 48133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhps (%5,%1,1),%%xmm2 \n" 48233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%5,%1,2),%%xmm3 \n" 48333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhps (%5,%4,1),%%xmm3 \n" 48433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%5,%1,4),%5 \n" 48533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm2,%%xmm0 \n" 48633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm3,%%xmm1 \n" 48733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm2 \n" 48833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0x88,%%xmm1,%%xmm0 \n" 48933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0xdd,%%xmm1,%%xmm2 \n" 49033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb %%xmm2,%%xmm0 \n" 49133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%3 \n" 49233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%2) \n" 49333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%2),%2 \n" 49433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 49533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_ptr), // %0 49633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(src_stepx_x4), // %1 49733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_ptr), // %2 49833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+rm"(dst_width), // %3 49933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(src_stepx_x12), // %4 50033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(row1) // %5 50133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 50233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 50333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 50433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3" 50533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 50633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 50733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 50833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 50933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifndef SSE2_DISABLED 51033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version 51133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_SCALEARGBFILTERROWS_SSE2_DISABLED 51233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr, 51333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ptrdiff_t src_stride, int dst_width, 51433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int source_y_fraction) { 51533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 51633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%0 \n" 51733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cmp $0x0,%3 \n" 51833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "je 2f \n" 51933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cmp $0x80,%3 \n" 52033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "je 3f \n" 52133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %3,%%xmm5 \n" 52233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm5 \n" 52333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm5,%%xmm5 \n" 52433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x0,%%xmm5,%%xmm5 \n" 52533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pxor %%xmm4,%%xmm4 \n" 52633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 52733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 52833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%1),%%xmm0 \n" 52933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%1,%4,1),%%xmm2 \n" 53033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 53133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm3 \n" 53233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm4,%%xmm2 \n" 53333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm4,%%xmm3 \n" 53433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm4,%%xmm0 \n" 53533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm4,%%xmm1 \n" 53633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubw %%xmm0,%%xmm2 \n" 53733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psubw %%xmm1,%%xmm3 \n" 53833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhw %%xmm5,%%xmm2 \n" 53933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmulhw %%xmm5,%%xmm3 \n" 54033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddw %%xmm2,%%xmm0 \n" 54133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "paddw %%xmm3,%%xmm1 \n" 54233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 54333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 54433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1,%0,1) \n" 54533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 54633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 54733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jmp 4f \n" 54833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 54933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "2: \n" 55033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%1),%%xmm0 \n" 55133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 55233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1,%0,1) \n" 55333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 55433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 2b \n" 55533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jmp 4f \n" 55633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 55733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "3: \n" 55833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%1),%%xmm0 \n" 55933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb (%1,%4,1),%%xmm0 \n" 56033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 56133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1,%0,1) \n" 56233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 56333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 56433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 3b \n" 56533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 56633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "4: \n" 56733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0xff,%%xmm0,%%xmm0 \n" 56833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1,%0,1) \n" 56933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(dst_ptr), // %0 57033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(src_ptr), // %1 57133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_width), // %2 57233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(source_y_fraction) // %3 57333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(static_cast<intptr_t>(src_stride)) // %4 57433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 57533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 57633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 57733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 57833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 57933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 58033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // SSE2_DISABLED 58133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 58233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version 58333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_SCALEARGBFILTERROWS_SSSE3 58433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr, 58533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ptrdiff_t src_stride, int dst_width, 58633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int source_y_fraction) { 58733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 58833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1,%0 \n" 58933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shr %3 \n" 59033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cmp $0x0,%3 \n" 59133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "je 2f \n" 59233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "cmp $0x40,%3 \n" 59333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "je 3f \n" 59433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %3,%%xmm0 \n" 59533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "neg %3 \n" 59633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "add $0x80,%3 \n" 59733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movd %3,%%xmm5 \n" 59833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm0,%%xmm5 \n" 59933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm5,%%xmm5 \n" 60033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pshufd $0x0,%%xmm5,%%xmm5 \n" 60133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 60233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 60333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%1),%%xmm0 \n" 60433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%1,%4,1),%%xmm2 \n" 60533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 60633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm2,%%xmm0 \n" 60733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm2,%%xmm1 \n" 60833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm5,%%xmm0 \n" 60933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pmaddubsw %%xmm5,%%xmm1 \n" 61033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm0 \n" 61133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "psrlw $0x7,%%xmm1 \n" 61233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "packuswb %%xmm1,%%xmm0 \n" 61333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 61433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1,%0,1) \n" 61533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 61633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 61733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jmp 4f \n" 61833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 61933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "2: \n" 62033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%1),%%xmm0 \n" 62133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 62233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1,%0,1) \n" 62333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 62433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 2b \n" 62533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jmp 4f \n" 62633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 62733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "3: \n" 62833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%1),%%xmm0 \n" 62933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pavgb (%1,%4,1),%%xmm0 \n" 63033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x4,%2 \n" 63133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1,%0,1) \n" 63233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%1),%1 \n" 63333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 3b \n" 63433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "4: \n" 63533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 63633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "shufps $0xff,%%xmm0,%%xmm0 \n" 63733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,(%1,%0,1) \n" 63833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(dst_ptr), // %0 63933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(src_ptr), // %1 64033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_width), // %2 64133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(source_y_fraction) // %3 64233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(static_cast<intptr_t>(src_stride)) // %4 64333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 64433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__) 64533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm5" 64633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 64733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 64833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 64933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // defined(__x86_64__) || defined(__i386__) 65033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 65133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDown2_C(const uint8* src_ptr, 65233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ptrdiff_t /* src_stride */, 65333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_ptr, int dst_width) { 65433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint32* src = reinterpret_cast<const uint32*>(src_ptr); 65533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint32* dst = reinterpret_cast<uint32*>(dst_ptr); 65633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 65733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int x = 0; x < dst_width - 1; x += 2) { 65833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst[0] = src[0]; 65933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst[1] = src[2]; 66033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src += 4; 66133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst += 2; 66233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 66333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (dst_width & 1) { 66433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst[0] = src[0]; 66533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 66633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 66733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 66833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDown2Int_C(const uint8* src_ptr, ptrdiff_t src_stride, 66933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_ptr, int dst_width) { 67033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int x = 0; x < dst_width; ++x) { 67133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[0] = (src_ptr[0] + src_ptr[4] + 67233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_ptr[src_stride] + src_ptr[src_stride + 4] + 2) >> 2; 67333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[1] = (src_ptr[1] + src_ptr[5] + 67433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_ptr[src_stride + 1] + src_ptr[src_stride + 5] + 2) >> 2; 67533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[2] = (src_ptr[2] + src_ptr[6] + 67633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_ptr[src_stride + 2] + src_ptr[src_stride + 6] + 2) >> 2; 67733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[3] = (src_ptr[3] + src_ptr[7] + 67833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_ptr[src_stride + 3] + src_ptr[src_stride + 7] + 2) >> 2; 67933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_ptr += 8; 68033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr += 4; 68133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 68233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 68333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 68433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ScaleARGBRowDownEven_C(const uint8* src_ptr, ptrdiff_t /* src_stride */, 68533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int src_stepx, 68633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_ptr, int dst_width) { 68733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint32* src = reinterpret_cast<const uint32*>(src_ptr); 68833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint32* dst = reinterpret_cast<uint32*>(dst_ptr); 68933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 69033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int x = 0; x < dst_width - 1; x += 2) { 69133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst[0] = src[0]; 69233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst[1] = src[src_stepx]; 69333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src += src_stepx * 2; 69433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst += 2; 69533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 69633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (dst_width & 1) { 69733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst[0] = src[0]; 69833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 69933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 70033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 70133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDownEvenInt_C(const uint8* src_ptr, 70233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ptrdiff_t src_stride, 70333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int src_stepx, 70433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_ptr, int dst_width) { 70533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int x = 0; x < dst_width; ++x) { 70633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[0] = (src_ptr[0] + src_ptr[4] + 70733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_ptr[src_stride] + src_ptr[src_stride + 4] + 2) >> 2; 70833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[1] = (src_ptr[1] + src_ptr[5] + 70933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_ptr[src_stride + 1] + src_ptr[src_stride + 5] + 2) >> 2; 71033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[2] = (src_ptr[2] + src_ptr[6] + 71133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_ptr[src_stride + 2] + src_ptr[src_stride + 6] + 2) >> 2; 71233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[3] = (src_ptr[3] + src_ptr[7] + 71333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_ptr[src_stride + 3] + src_ptr[src_stride + 7] + 2) >> 2; 71433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_ptr += src_stepx * 4; 71533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr += 4; 71633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 71733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 71833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 71933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// (1-f)a + fb can be replaced with a + f(b-a) 72033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 72133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define BLENDER1(a, b, f) (static_cast<int>(a) + \ 72233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ((f) * (static_cast<int>(b) - static_cast<int>(a)) >> 16)) 72333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 72433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define BLENDERC(a, b, f, s) static_cast<uint32>( \ 72533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s) 72633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 72733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define BLENDER(a, b, f) \ 72833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \ 72933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0) 73033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 73133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, 73233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int dst_width, int x, int dx) { 73333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint32* src = reinterpret_cast<const uint32*>(src_ptr); 73433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint32* dst = reinterpret_cast<uint32*>(dst_ptr); 73533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int j = 0; j < dst_width - 1; j += 2) { 73633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int xi = x >> 16; 73733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint32 a = src[xi]; 73833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint32 b = src[xi + 1]; 73933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst[0] = BLENDER(a, b, x & 0xffff); 74033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp x += dx; 74133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp xi = x >> 16; 74233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp a = src[xi]; 74333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp b = src[xi + 1]; 74433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst[1] = BLENDER(a, b, x & 0xffff); 74533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp x += dx; 74633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst += 2; 74733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 74833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (dst_width & 1) { 74933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int xi = x >> 16; 75033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint32 a = src[xi]; 75133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint32 b = src[xi + 1]; 75233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst[0] = BLENDER(a, b, x & 0xffff); 75333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 75433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 75533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 75633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic const int kMaxInputWidth = 2560; 75733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 75833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// C version 2x2 -> 2x1 75933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ScaleARGBFilterRows_C(uint8* dst_ptr, const uint8* src_ptr, 76033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ptrdiff_t src_stride, 76133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int dst_width, int source_y_fraction) { 76233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp assert(dst_width > 0); 76333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int y1_fraction = source_y_fraction; 76433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int y0_fraction = 256 - y1_fraction; 76533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* src_ptr1 = src_ptr + src_stride; 76633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* end = dst_ptr + (dst_width << 2); 76733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp do { 76833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; 76933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8; 77033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[2] = (src_ptr[2] * y0_fraction + src_ptr1[2] * y1_fraction) >> 8; 77133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[3] = (src_ptr[3] * y0_fraction + src_ptr1[3] * y1_fraction) >> 8; 77233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[4] = (src_ptr[4] * y0_fraction + src_ptr1[4] * y1_fraction) >> 8; 77333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[5] = (src_ptr[5] * y0_fraction + src_ptr1[5] * y1_fraction) >> 8; 77433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[6] = (src_ptr[6] * y0_fraction + src_ptr1[6] * y1_fraction) >> 8; 77533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[7] = (src_ptr[7] * y0_fraction + src_ptr1[7] * y1_fraction) >> 8; 77633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_ptr += 8; 77733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_ptr1 += 8; 77833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr += 8; 77933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } while (dst_ptr < end); 78033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // Duplicate the last pixel (4 bytes) for filtering. 78133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[0] = dst_ptr[-4]; 78233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[1] = dst_ptr[-3]; 78333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[2] = dst_ptr[-2]; 78433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr[3] = dst_ptr[-1]; 78533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 78633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 78733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp/** 78833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * ScaleARGB ARGB, 1/2 78933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * 79033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * This is an optimized version for scaling down a ARGB to 1/2 of 79133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * its original size. 79233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * 79333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp */ 79433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBDown2(int /* src_width */, int /* src_height */, 79533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int dst_width, int dst_height, 79633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int src_stride, int dst_stride, 79733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* src_ptr, uint8* dst_ptr, 79833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp FilterMode filtering) { 79933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp void (*ScaleARGBRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, 80033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_ptr, int dst_width) = 80133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp filtering ? ScaleARGBRowDown2Int_C : ScaleARGBRowDown2_C; 80233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(HAS_SCALEARGBROWDOWN2_SSE2) 80333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (TestCpuFlag(kCpuHasSSE2) && 80433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(dst_width, 4) && 80533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && 80633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { 80733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Int_SSE2 : 80833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGBRowDown2_SSE2; 80933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 81033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 81133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 81233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // TODO(fbarchard): Loop through source height to allow odd height. 81333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int y = 0; y < dst_height; ++y) { 81433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGBRowDown2(src_ptr, src_stride, dst_ptr, dst_width); 81533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_ptr += (src_stride << 1); 81633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr += dst_stride; 81733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 81833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 81933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 82033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp/** 82133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * ScaleARGB ARGB Even 82233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * 82333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * This is an optimized version for scaling down a ARGB to even 82433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * multiple of its original size. 82533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * 82633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp */ 82733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBDownEven(int src_width, int src_height, 82833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int dst_width, int dst_height, 82933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int src_stride, int dst_stride, 83033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* src_ptr, uint8* dst_ptr, 83133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp FilterMode filtering) { 83233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp assert(IS_ALIGNED(src_width, 2)); 83333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp assert(IS_ALIGNED(src_height, 2)); 83433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp void (*ScaleARGBRowDownEven)(const uint8* src_ptr, ptrdiff_t src_stride, 83533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int src_step, uint8* dst_ptr, int dst_width) = 83633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp filtering ? ScaleARGBRowDownEvenInt_C : ScaleARGBRowDownEven_C; 83733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) 83833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (TestCpuFlag(kCpuHasSSE2) && 83933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(dst_width, 4) && 84033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { 84133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenInt_SSE2 : 84233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGBRowDownEven_SSE2; 84333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 84433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 84533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int src_step = src_width / dst_width; 84633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // Adjust to point to center of box. 84733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int row_step = src_height / dst_height; 84833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int row_stride = row_step * src_stride; 84933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_ptr += ((row_step >> 1) - 1) * src_stride + ((src_step >> 1) - 1) * 4; 85033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int y = 0; y < dst_height; ++y) { 85133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGBRowDownEven(src_ptr, src_stride, src_step, dst_ptr, dst_width); 85233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_ptr += row_stride; 85333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr += dst_stride; 85433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 85533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 85633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp/** 85733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * ScaleARGB ARGB to/from any dimensions, with bilinear 85833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * interpolation. 85933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp */ 86033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 86133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBBilinear(int src_width, int src_height, 86233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int dst_width, int dst_height, 86333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int src_stride, int dst_stride, 86433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* src_ptr, uint8* dst_ptr) { 86533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp assert(dst_width > 0); 86633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp assert(dst_height > 0); 86733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp assert(src_width <= kMaxInputWidth); 86833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp SIMD_ALIGNED(uint8 row[kMaxInputWidth * 4 + 16]); 86933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp void (*ScaleARGBFilterRows)(uint8* dst_ptr, const uint8* src_ptr, 87033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ptrdiff_t src_stride, 87133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int dst_width, int source_y_fraction) = 87233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGBFilterRows_C; 87333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(HAS_SCALEARGBFILTERROWS_SSE2) 87433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (TestCpuFlag(kCpuHasSSE2) && 87533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) { 87633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGBFilterRows = ScaleARGBFilterRows_SSE2; 87733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 87833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 87933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(HAS_SCALEARGBFILTERROWS_SSSE3) 88033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (TestCpuFlag(kCpuHasSSSE3) && 88133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) { 88233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGBFilterRows = ScaleARGBFilterRows_SSSE3; 88333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 88433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 88533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int dx = (src_width << 16) / dst_width; 88633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int dy = (src_height << 16) / dst_height; 88733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int x = (dx >= 65536) ? ((dx >> 1) - 32768) : (dx >> 1); 88833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int y = (dy >= 65536) ? ((dy >> 1) - 32768) : (dy >> 1); 88933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int maxy = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; 89033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int j = 0; j < dst_height; ++j) { 89133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int yi = y >> 16; 89233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int yf = (y >> 8) & 255; 89333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* src = src_ptr + yi * src_stride; 89433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGBFilterRows(row, src, src_stride, src_width, yf); 89533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGBFilterCols_C(dst_ptr, row, dst_width, x, dx); 89633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr += dst_stride; 89733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp y += dy; 89833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (y > maxy) { 89933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp y = maxy; 90033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 90133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 90233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 90333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 90433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Scales a single row of pixels using point sampling. 90533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Code is adapted from libyuv bilinear yuv scaling, but with bilinear 90633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// interpolation off, and argb pixels instead of yuv. 90733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBCols(uint8* dst_ptr, const uint8* src_ptr, 90833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int dst_width, int x, int dx) { 90933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint32* src = reinterpret_cast<const uint32*>(src_ptr); 91033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint32* dst = reinterpret_cast<uint32*>(dst_ptr); 91133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int j = 0; j < dst_width - 1; j += 2) { 91233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst[0] = src[x >> 16]; 91333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp x += dx; 91433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst[1] = src[x >> 16]; 91533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp x += dx; 91633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst += 2; 91733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 91833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (dst_width & 1) { 91933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst[0] = src[x >> 16]; 92033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 92133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 92233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 92333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp/** 92433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * ScaleARGB ARGB to/from any dimensions, without interpolation. 92533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * Fixed point math is used for performance: The upper 16 bits 92633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * of x and dx is the integer part of the source position and 92733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * the lower 16 bits are the fixed decimal part. 92833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp */ 92933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 93033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBSimple(int src_width, int src_height, 93133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int dst_width, int dst_height, 93233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int src_stride, int dst_stride, 93333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* src_ptr, uint8* dst_ptr) { 93433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int dx = (src_width << 16) / dst_width; 93533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int dy = (src_height << 16) / dst_height; 93633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int x = (dx >= 65536) ? ((dx >> 1) - 32768) : (dx >> 1); 93733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int y = (dy >= 65536) ? ((dy >> 1) - 32768) : (dy >> 1); 93833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int i = 0; i < dst_height; ++i) { 93933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGBCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx); 94033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_ptr += dst_stride; 94133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp y += dy; 94233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 94333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 94433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 94533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp/** 94633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * ScaleARGB ARGB to/from any dimensions. 94733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp */ 94833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBAnySize(int src_width, int src_height, 94933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int dst_width, int dst_height, 95033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int src_stride, int dst_stride, 95133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* src_ptr, uint8* dst_ptr, 95233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp FilterMode filtering) { 95333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (!filtering || (src_width > kMaxInputWidth)) { 95433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGBSimple(src_width, src_height, dst_width, dst_height, 95533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_stride, dst_stride, src_ptr, dst_ptr); 95633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } else { 95733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGBBilinear(src_width, src_height, dst_width, dst_height, 95833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_stride, dst_stride, src_ptr, dst_ptr); 95933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 96033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 96133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 96233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// ScaleARGB a ARGB. 96333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// 96433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// This function in turn calls a scaling function 96533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// suitable for handling the desired resolutions. 96633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 96733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGB(const uint8* src, int src_stride, 96833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int src_width, int src_height, 96933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst, int dst_stride, 97033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int dst_width, int dst_height, 97133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp FilterMode filtering) { 97233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef CPU_X86 97333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // environment variable overrides for testing. 97433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp char *filter_override = getenv("LIBYUV_FILTER"); 97533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (filter_override) { 97633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp filtering = (FilterMode)atoi(filter_override); // NOLINT 97733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 97833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 97933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (dst_width == src_width && dst_height == src_height) { 98033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // Straight copy. 98133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ARGBCopy(src, src_stride, dst, dst_stride, dst_width, dst_height); 98233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp return; 98333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 98433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (2 * dst_width == src_width && 2 * dst_height == src_height) { 98533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // Optimized 1/2. 98633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGBDown2(src_width, src_height, dst_width, dst_height, 98733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_stride, dst_stride, src, dst, filtering); 98833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp return; 98933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 99033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int scale_down_x = src_width / dst_width; 99133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int scale_down_y = src_height / dst_height; 99233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (dst_width * scale_down_x == src_width && 99333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_height * scale_down_y == src_height) { 99433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (!(scale_down_x & 1) && !(scale_down_y & 1)) { 99533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // Optimized even scale down. ie 4, 6, 8, 10x 99633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGBDownEven(src_width, src_height, dst_width, dst_height, 99733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_stride, dst_stride, src, dst, filtering); 99833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp return; 99933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 100033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if ((scale_down_x & 1) && (scale_down_y & 1)) { 100133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp filtering = kFilterNone; 100233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 100333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 100433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // Arbitrary scale up and/or down. 100533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGBAnySize(src_width, src_height, dst_width, dst_height, 100633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_stride, dst_stride, src, dst, filtering); 100733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 100833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 100933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// ScaleARGB an ARGB image. 101033cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampLIBYUV_API 101133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampint ARGBScale(const uint8* src_argb, int src_stride_argb, 101233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int src_width, int src_height, 101333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_argb, int dst_stride_argb, 101433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int dst_width, int dst_height, 101533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp FilterMode filtering) { 101633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (!src_argb || src_width <= 0 || src_height == 0 || 101733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp !dst_argb || dst_width <= 0 || dst_height <= 0) { 101833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp return -1; 101933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 102033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // Negative height means invert the image. 102133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (src_height < 0) { 102233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_height = -src_height; 102333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_argb = src_argb + (src_height - 1) * src_stride_argb; 102433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_stride_argb = -src_stride_argb; 102533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 102633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ScaleARGB(src_argb, src_stride_argb, src_width, src_height, 102733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_argb, dst_stride_argb, dst_width, dst_height, 102833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp filtering); 102933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp return 0; 103033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 103133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 103233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef __cplusplus 103333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} // extern "C" 103433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} // namespace libyuv 103533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 1036