133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp/*
233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp *
433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp *  Use of this source code is governed by a BSD-style license
533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp *  that can be found in the LICENSE file in the root of the source
633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp *  tree. An additional intellectual property rights grant can be found
733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp *  in the file PATENTS.  All contributing project authors may
833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp *  be found in the AUTHORS file in the root of the source tree.
933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp */
1033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
1133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include "libyuv/scale.h"
1233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
1333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include <assert.h>
1433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include <string.h>
1533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include <stdlib.h>  // For getenv()
1633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
1733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include "libyuv/cpu_id.h"
1833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include "libyuv/planar_functions.h"  // For CopyARGB
1933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include "libyuv/row.h"
2033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
2133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef __cplusplus
2233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampnamespace libyuv {
2333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampextern "C" {
2433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif
2533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
2633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Bilinear SSE2 is disabled.
2733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define SSE2_DISABLED 1
2833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
2933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// ARGB scaling uses bilinear or point, but not box filter.
3033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp/**
3133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * SSE2 downscalers with bilinear interpolation.
3233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp */
3333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
3433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
3533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
3633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_SCALEARGBROWDOWN2_SSE2
3733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6)
3833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
3933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp__declspec(naked) __declspec(align(16))
4033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDown2_SSE2(const uint8* src_ptr,
4133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                   ptrdiff_t /* src_stride */,
4233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                   uint8* dst_ptr, int dst_width) {
4333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  __asm {
4433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        eax, [esp + 4]        // src_ptr
4533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                     // src_stride ignored
4633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        edx, [esp + 12]       // dst_ptr
4733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        ecx, [esp + 16]       // dst_width
4833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
4933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    align      16
5033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  wloop:
5133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm0, [eax]
5233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm1, [eax + 16]
5333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        eax,  [eax + 32]
5433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    shufps     xmm0, xmm1, 0x88
5533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    sub        ecx, 4
5633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     [edx], xmm0
5733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        edx, [edx + 16]
5833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    jg         wloop
5933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
6033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ret
6133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
6233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
6333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
6433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Blends 8x2 rectangle to 4x1.
6533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
6633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp__declspec(naked) __declspec(align(16))
6733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDown2Int_SSE2(const uint8* src_ptr,
6833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                      ptrdiff_t src_stride,
6933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                      uint8* dst_ptr, int dst_width) {
7033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  __asm {
7133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    push       esi
7233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        eax, [esp + 4 + 4]    // src_ptr
7333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        esi, [esp + 4 + 8]    // src_stride
7433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        edx, [esp + 4 + 12]   // dst_ptr
7533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        ecx, [esp + 4 + 16]   // dst_width
7633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
7733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    align      16
7833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  wloop:
7933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm0, [eax]
8033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm1, [eax + 16]
8133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm2, [eax + esi]
8233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm3, [eax + esi + 16]
8333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        eax,  [eax + 32]
8433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pavgb      xmm0, xmm2            // average rows
8533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pavgb      xmm1, xmm3
8633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm2, xmm0            // average columns (8 to 4 pixels)
8733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    shufps     xmm0, xmm1, 0x88      // even pixels
8833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    shufps     xmm2, xmm1, 0xdd      // odd pixels
8933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pavgb      xmm0, xmm2
9033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    sub        ecx, 4
9133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     [edx], xmm0
9233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        edx, [edx + 16]
9333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    jg         wloop
9433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
9533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        esi
9633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ret
9733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
9833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
9933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
10033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_SCALEARGBROWDOWNEVEN_SSE2
10133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Reads 4 pixels at a time.
10233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Alignment requirement: dst_ptr 16 byte aligned.
10333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp__declspec(naked) __declspec(align(16))
10433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
10533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                               int src_stepx,
10633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                               uint8* dst_ptr, int dst_width) {
10733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  __asm {
10833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    push       ebx
10933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    push       edi
11033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        eax, [esp + 8 + 4]    // src_ptr
11133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                     // src_stride ignored
11233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        ebx, [esp + 8 + 12]   // src_stepx
11333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        edx, [esp + 8 + 16]   // dst_ptr
11433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        ecx, [esp + 8 + 20]   // dst_width
11533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        ebx, [ebx * 4]
11633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        edi, [ebx + ebx * 2]
11733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
11833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    align      16
11933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  wloop:
12033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movd       xmm0, [eax]
12133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movd       xmm1, [eax + ebx]
12233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    punpckldq  xmm0, xmm1
12333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movd       xmm2, [eax + ebx * 2]
12433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movd       xmm3, [eax + edi]
12533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        eax,  [eax + ebx * 4]
12633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    punpckldq  xmm2, xmm3
12733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    punpcklqdq xmm0, xmm2
12833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    sub        ecx, 4
12933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     [edx], xmm0
13033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        edx, [edx + 16]
13133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    jg         wloop
13233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
13333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        edi
13433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        ebx
13533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ret
13633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
13733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
13833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
13933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Blends four 2x2 to 4x1.
14033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Alignment requirement: dst_ptr 16 byte aligned.
14133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp__declspec(naked) __declspec(align(16))
14233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_ptr,
14333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                         ptrdiff_t src_stride,
14433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                         int src_stepx,
14533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                         uint8* dst_ptr, int dst_width) {
14633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  __asm {
14733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    push       ebx
14833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    push       esi
14933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    push       edi
15033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        eax, [esp + 12 + 4]    // src_ptr
15133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        esi, [esp + 12 + 8]    // src_stride
15233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        ebx, [esp + 12 + 12]   // src_stepx
15333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        edx, [esp + 12 + 16]   // dst_ptr
15433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        ecx, [esp + 12 + 20]   // dst_width
15533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        esi, [eax + esi]      // row1 pointer
15633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        ebx, [ebx * 4]
15733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        edi, [ebx + ebx * 2]
15833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
15933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    align      16
16033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  wloop:
16133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movq       xmm0, qword ptr [eax] // row0 4 pairs
16233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movhps     xmm0, qword ptr [eax + ebx]
16333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movq       xmm1, qword ptr [eax + ebx * 2]
16433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movhps     xmm1, qword ptr [eax + edi]
16533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        eax,  [eax + ebx * 4]
16633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movq       xmm2, qword ptr [esi] // row1 4 pairs
16733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movhps     xmm2, qword ptr [esi + ebx]
16833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movq       xmm3, qword ptr [esi + ebx * 2]
16933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movhps     xmm3, qword ptr [esi + edi]
17033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        esi,  [esi + ebx * 4]
17133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pavgb      xmm0, xmm2            // average rows
17233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pavgb      xmm1, xmm3
17333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm2, xmm0            // average columns (8 to 4 pixels)
17433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    shufps     xmm0, xmm1, 0x88      // even pixels
17533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    shufps     xmm2, xmm1, 0xdd      // odd pixels
17633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pavgb      xmm0, xmm2
17733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    sub        ecx, 4
17833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     [edx], xmm0
17933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        edx, [edx + 16]
18033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    jg         wloop
18133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
18233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        edi
18333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        esi
18433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        ebx
18533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ret
18633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
18733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
18833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
18933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version.
19033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifndef SSE2_DISABLED
19133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_SCALEARGBFILTERROWS_SSE2_DISABLED
19233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp__declspec(naked) __declspec(align(16))
19333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
19433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                              ptrdiff_t src_stride, int dst_width,
19533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                              int source_y_fraction) {
19633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  __asm {
19733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    push       esi
19833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    push       edi
19933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        edi, [esp + 8 + 4]   // dst_ptr
20033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        esi, [esp + 8 + 8]   // src_ptr
20133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        edx, [esp + 8 + 12]  // src_stride
20233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        ecx, [esp + 8 + 16]  // dst_width
20333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
20433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    sub        edi, esi
20533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    cmp        eax, 0
20633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    je         xloop1
20733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    cmp        eax, 128
20833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    je         xloop2
20933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
21033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movd       xmm5, eax            // xmm5 = y fraction
21133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    punpcklbw  xmm5, xmm5
21233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    punpcklwd  xmm5, xmm5
21333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pshufd     xmm5, xmm5, 0
21433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pxor       xmm4, xmm4
21533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
21633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    // f * row1 + (1 - frac) row0
21733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    // frac * (row1 - row0) + row0
21833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    align      16
21933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  xloop:
22033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm0, [esi]  // row0
22133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm2, [esi + edx]  // row1
22233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm1, xmm0
22333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm3, xmm2
22433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    punpcklbw  xmm2, xmm4
22533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    punpckhbw  xmm3, xmm4
22633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    punpcklbw  xmm0, xmm4
22733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    punpckhbw  xmm1, xmm4
22833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    psubw      xmm2, xmm0  // row1 - row0
22933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    psubw      xmm3, xmm1
23033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pmulhw     xmm2, xmm5  // scale diff
23133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pmulhw     xmm3, xmm5
23233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    paddw      xmm0, xmm2  // sum rows
23333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    paddw      xmm1, xmm3
23433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    packuswb   xmm0, xmm1
23533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    sub        ecx, 4
23633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     [esi + edi], xmm0
23733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        esi, [esi + 16]
23833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    jg         xloop
23933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
24033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    shufps     xmm0, xmm0, 0xff
24133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     [esi + edi], xmm0    // duplicate last pixel for filtering
24233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        edi
24333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        esi
24433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ret
24533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
24633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    align      16
24733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  xloop1:
24833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm0, [esi]
24933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    sub        ecx, 4
25033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     [esi + edi], xmm0
25133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        esi, [esi + 16]
25233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    jg         xloop1
25333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
25433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    shufps     xmm0, xmm0, 0xff
25533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     [esi + edi], xmm0
25633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        edi
25733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        esi
25833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ret
25933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
26033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    align      16
26133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  xloop2:
26233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm0, [esi]
26333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pavgb      xmm0, [esi + edx]
26433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    sub        ecx, 4
26533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     [esi + edi], xmm0
26633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        esi, [esi + 16]
26733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    jg         xloop2
26833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
26933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    shufps     xmm0, xmm0, 0xff
27033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     [esi + edi], xmm0
27133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        edi
27233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        esi
27333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ret
27433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
27533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
27633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif  // SSE2_DISABLED
27733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
27833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version.
27933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_SCALEARGBFILTERROWS_SSSE3
28033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp__declspec(naked) __declspec(align(16))
28133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
28233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                               ptrdiff_t src_stride, int dst_width,
28333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                               int source_y_fraction) {
28433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  __asm {
28533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    push       esi
28633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    push       edi
28733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        edi, [esp + 8 + 4]   // dst_ptr
28833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        esi, [esp + 8 + 8]   // src_ptr
28933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        edx, [esp + 8 + 12]  // src_stride
29033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        ecx, [esp + 8 + 16]  // dst_width
29133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
29233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    sub        edi, esi
29333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    shr        eax, 1
29433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    cmp        eax, 0
29533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    je         xloop1
29633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    cmp        eax, 64
29733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    je         xloop2
29833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movd       xmm0, eax  // high fraction 0..127
29933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    neg        eax
30033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    add        eax, 128
30133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movd       xmm5, eax  // low fraction 128..1
30233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    punpcklbw  xmm5, xmm0
30333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    punpcklwd  xmm5, xmm5
30433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pshufd     xmm5, xmm5, 0
30533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
30633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    align      16
30733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  xloop:
30833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm0, [esi]
30933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm2, [esi + edx]
31033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm1, xmm0
31133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    punpcklbw  xmm0, xmm2
31233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    punpckhbw  xmm1, xmm2
31333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pmaddubsw  xmm0, xmm5
31433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pmaddubsw  xmm1, xmm5
31533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    psrlw      xmm0, 7
31633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    psrlw      xmm1, 7
31733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    packuswb   xmm0, xmm1
31833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    sub        ecx, 4
31933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     [esi + edi], xmm0
32033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        esi, [esi + 16]
32133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    jg         xloop
32233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
32333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    shufps     xmm0, xmm0, 0xff
32433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     [esi + edi], xmm0    // duplicate last pixel for filtering
32533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        edi
32633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        esi
32733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ret
32833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
32933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    align      16
33033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  xloop1:
33133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm0, [esi]
33233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    sub        ecx, 4
33333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     [esi + edi], xmm0
33433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        esi, [esi + 16]
33533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    jg         xloop1
33633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
33733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    shufps     xmm0, xmm0, 0xff
33833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     [esi + edi], xmm0
33933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        edi
34033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        esi
34133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ret
34233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
34333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    align      16
34433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  xloop2:
34533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     xmm0, [esi]
34633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pavgb      xmm0, [esi + edx]
34733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    sub        ecx, 4
34833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     [esi + edi], xmm0
34933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    lea        esi, [esi + 16]
35033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    jg         xloop2
35133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
35233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    shufps     xmm0, xmm0, 0xff
35333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    movdqa     [esi + edi], xmm0
35433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        edi
35533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    pop        esi
35633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ret
35733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
35833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
35933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
36033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#elif !defined(YUV_DISABLE_ASM) && (defined(__x86_64__) || defined(__i386__))
36133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
36233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// GCC versions of row functions are verbatim conversions from Visual C.
36333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Generated using gcc disassembly on Visual C object file:
36433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// objdump -D yuvscaler.obj >yuvscaler.txt
36533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_SCALEARGBROWDOWN2_SSE2
36633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDown2_SSE2(const uint8* src_ptr,
36733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                   ptrdiff_t /* src_stride */,
36833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                   uint8* dst_ptr, int dst_width) {
36933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  asm volatile (
37033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ".p2align  4                               \n"
37133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  "1:                                          \n"
37233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    (%0),%%xmm0                     \n"
37333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    0x10(%0),%%xmm1                 \n"
37433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       0x20(%0),%0                     \n"
37533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "shufps    $0x88,%%xmm1,%%xmm0             \n"
37633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "sub       $0x4,%2                         \n"
37733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    %%xmm0,(%1)                     \n"
37833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       0x10(%1),%1                     \n"
37933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "jg        1b                              \n"
38033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  : "+r"(src_ptr),   // %0
38133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(dst_ptr),   // %1
38233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(dst_width)  // %2
38333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  :
38433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  : "memory", "cc"
38533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__)
38633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    , "xmm0", "xmm1"
38733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif
38833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  );
38933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
39033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
39133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDown2Int_SSE2(const uint8* src_ptr,
39233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                      ptrdiff_t src_stride,
39333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                      uint8* dst_ptr, int dst_width) {
39433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  asm volatile (
39533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ".p2align  4                               \n"
39633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  "1:                                          \n"
39733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    (%0),%%xmm0                     \n"
39833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    0x10(%0),%%xmm1                 \n"
39933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    (%0,%3,1),%%xmm2                \n"
40033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    0x10(%0,%3,1),%%xmm3            \n"
40133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       0x20(%0),%0                     \n"
40233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "pavgb     %%xmm2,%%xmm0                   \n"
40333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "pavgb     %%xmm3,%%xmm1                   \n"
40433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    %%xmm0,%%xmm2                   \n"
40533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "shufps    $0x88,%%xmm1,%%xmm0             \n"
40633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "shufps    $0xdd,%%xmm1,%%xmm2             \n"
40733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "pavgb     %%xmm2,%%xmm0                   \n"
40833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "sub       $0x4,%2                         \n"
40933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    %%xmm0,(%1)                     \n"
41033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       0x10(%1),%1                     \n"
41133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "jg        1b                              \n"
41233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  : "+r"(src_ptr),    // %0
41333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(dst_ptr),    // %1
41433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(dst_width)   // %2
41533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  : "r"(static_cast<intptr_t>(src_stride))   // %3
41633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  : "memory", "cc"
41733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__)
41833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    , "xmm0", "xmm1", "xmm2", "xmm3"
41933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif
42033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  );
42133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
42233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
42333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_SCALEARGBROWDOWNEVEN_SSE2
42433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Reads 4 pixels at a time.
42533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Alignment requirement: dst_ptr 16 byte aligned.
42633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
42733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                               int src_stepx,
42833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                               uint8* dst_ptr, int dst_width) {
42933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  intptr_t src_stepx_x4 = static_cast<intptr_t>(src_stepx);
43033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  intptr_t src_stepx_x12 = 0;
43133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  asm volatile (
43233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       0x0(,%1,4),%1                   \n"
43333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       (%1,%1,2),%4                    \n"
43433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ".p2align  4                               \n"
43533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  "1:                                          \n"
43633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movd      (%0),%%xmm0                     \n"
43733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movd      (%0,%1,1),%%xmm1                \n"
43833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "punpckldq %%xmm1,%%xmm0                   \n"
43933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movd      (%0,%1,2),%%xmm2                \n"
44033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movd      (%0,%4,1),%%xmm3                \n"
44133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       (%0,%1,4),%0                    \n"
44233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "punpckldq %%xmm3,%%xmm2                   \n"
44333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "punpcklqdq %%xmm2,%%xmm0                  \n"
44433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "sub       $0x4,%3                         \n"
44533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    %%xmm0,(%2)                     \n"
44633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       0x10(%2),%2                     \n"
44733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "jg        1b                              \n"
44833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  : "+r"(src_ptr),       // %0
44933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(src_stepx_x4),  // %1
45033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(dst_ptr),       // %2
45133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(dst_width),     // %3
45233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(src_stepx_x12)  // %4
45333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  :
45433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  : "memory", "cc"
45533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__)
45633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    , "xmm0", "xmm1", "xmm2", "xmm3"
45733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif
45833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  );
45933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
46033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
46133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Blends four 2x2 to 4x1.
46233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Alignment requirement: dst_ptr 16 byte aligned.
46333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDownEvenInt_SSE2(const uint8* src_ptr,
46433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                         ptrdiff_t src_stride, int src_stepx,
46533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                         uint8* dst_ptr, int dst_width) {
46633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  intptr_t src_stepx_x4 = static_cast<intptr_t>(src_stepx);
46733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  intptr_t src_stepx_x12 = 0;
46833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  intptr_t row1 = static_cast<intptr_t>(src_stride);
46933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  asm volatile (
47033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       0x0(,%1,4),%1                   \n"
47133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       (%1,%1,2),%4                    \n"
47233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       (%0,%5,1),%5                    \n"
47333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ".p2align  4                               \n"
47433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  "1:                                          \n"
47533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movq      (%0),%%xmm0                     \n"
47633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movhps    (%0,%1,1),%%xmm0                \n"
47733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movq      (%0,%1,2),%%xmm1                \n"
47833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movhps    (%0,%4,1),%%xmm1                \n"
47933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       (%0,%1,4),%0                    \n"
48033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movq      (%5),%%xmm2                     \n"
48133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movhps    (%5,%1,1),%%xmm2                \n"
48233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movq      (%5,%1,2),%%xmm3                \n"
48333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movhps    (%5,%4,1),%%xmm3                \n"
48433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       (%5,%1,4),%5                    \n"
48533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "pavgb     %%xmm2,%%xmm0                   \n"
48633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "pavgb     %%xmm3,%%xmm1                   \n"
48733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    %%xmm0,%%xmm2                   \n"
48833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "shufps    $0x88,%%xmm1,%%xmm0             \n"
48933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "shufps    $0xdd,%%xmm1,%%xmm2             \n"
49033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "pavgb     %%xmm2,%%xmm0                   \n"
49133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "sub       $0x4,%3                         \n"
49233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    %%xmm0,(%2)                     \n"
49333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       0x10(%2),%2                     \n"
49433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "jg        1b                              \n"
49533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  : "+r"(src_ptr),        // %0
49633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(src_stepx_x4),   // %1
49733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(dst_ptr),        // %2
49833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+rm"(dst_width),     // %3
49933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(src_stepx_x12),  // %4
50033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(row1)            // %5
50133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  :
50233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  : "memory", "cc"
50333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__)
50433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    , "xmm0", "xmm1", "xmm2", "xmm3"
50533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif
50633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  );
50733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
50833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
50933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifndef SSE2_DISABLED
51033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Bilinear row filtering combines 4x2 -> 4x1. SSE2 version
51133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_SCALEARGBFILTERROWS_SSE2_DISABLED
51233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ScaleARGBFilterRows_SSE2(uint8* dst_ptr, const uint8* src_ptr,
51333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                              ptrdiff_t src_stride, int dst_width,
51433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                              int source_y_fraction) {
51533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  asm volatile (
51633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "sub       %1,%0                           \n"
51733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "cmp       $0x0,%3                         \n"
51833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "je        2f                              \n"
51933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "cmp       $0x80,%3                        \n"
52033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "je        3f                              \n"
52133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movd      %3,%%xmm5                       \n"
52233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "punpcklbw %%xmm5,%%xmm5                   \n"
52333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "punpcklwd %%xmm5,%%xmm5                   \n"
52433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
52533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "pxor      %%xmm4,%%xmm4                   \n"
52633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ".p2align  4                               \n"
52733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  "1:                                          \n"
52833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    (%1),%%xmm0                     \n"
52933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    (%1,%4,1),%%xmm2                \n"
53033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    %%xmm0,%%xmm1                   \n"
53133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    %%xmm2,%%xmm3                   \n"
53233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "punpcklbw %%xmm4,%%xmm2                   \n"
53333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "punpckhbw %%xmm4,%%xmm3                   \n"
53433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "punpcklbw %%xmm4,%%xmm0                   \n"
53533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "punpckhbw %%xmm4,%%xmm1                   \n"
53633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "psubw     %%xmm0,%%xmm2                   \n"
53733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "psubw     %%xmm1,%%xmm3                   \n"
53833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "pmulhw    %%xmm5,%%xmm2                   \n"
53933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "pmulhw    %%xmm5,%%xmm3                   \n"
54033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "paddw     %%xmm2,%%xmm0                   \n"
54133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "paddw     %%xmm3,%%xmm1                   \n"
54233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "packuswb  %%xmm1,%%xmm0                   \n"
54333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "sub       $0x4,%2                         \n"
54433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    %%xmm0,(%1,%0,1)                \n"
54533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       0x10(%1),%1                     \n"
54633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "jg        1b                              \n"
54733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "jmp       4f                              \n"
54833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ".p2align  4                               \n"
54933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  "2:                                          \n"
55033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    (%1),%%xmm0                     \n"
55133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "sub       $0x4,%2                         \n"
55233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    %%xmm0,(%1,%0,1)                \n"
55333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       0x10(%1),%1                     \n"
55433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "jg        2b                              \n"
55533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "jmp       4f                              \n"
55633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ".p2align  4                               \n"
55733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  "3:                                          \n"
55833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    (%1),%%xmm0                     \n"
55933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "pavgb     (%1,%4,1),%%xmm0                \n"
56033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "sub       $0x4,%2                         \n"
56133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    %%xmm0,(%1,%0,1)                \n"
56233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       0x10(%1),%1                     \n"
56333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       0x10(%1),%1                     \n"
56433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "jg        3b                              \n"
56533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ".p2align  4                               \n"
56633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  "4:                                          \n"
56733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "shufps    $0xff,%%xmm0,%%xmm0             \n"
56833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    %%xmm0,(%1,%0,1)                \n"
56933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  : "+r"(dst_ptr),     // %0
57033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(src_ptr),     // %1
57133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(dst_width),   // %2
57233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(source_y_fraction)  // %3
57333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  : "r"(static_cast<intptr_t>(src_stride))  // %4
57433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  : "memory", "cc"
57533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__)
57633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
57733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif
57833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  );
57933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
58033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif  // SSE2_DISABLED
58133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
58233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
58333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_SCALEARGBFILTERROWS_SSSE3
58433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ScaleARGBFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
58533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                               ptrdiff_t src_stride, int dst_width,
58633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                               int source_y_fraction) {
58733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  asm volatile (
58833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "sub       %1,%0                           \n"
58933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "shr       %3                              \n"
59033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "cmp       $0x0,%3                         \n"
59133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "je        2f                              \n"
59233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "cmp       $0x40,%3                        \n"
59333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "je        3f                              \n"
59433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movd      %3,%%xmm0                       \n"
59533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "neg       %3                              \n"
59633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "add       $0x80,%3                        \n"
59733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movd      %3,%%xmm5                       \n"
59833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "punpcklbw %%xmm0,%%xmm5                   \n"
59933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "punpcklwd %%xmm5,%%xmm5                   \n"
60033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
60133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ".p2align  4                               \n"
60233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  "1:                                          \n"
60333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    (%1),%%xmm0                     \n"
60433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    (%1,%4,1),%%xmm2                \n"
60533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    %%xmm0,%%xmm1                   \n"
60633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "punpcklbw %%xmm2,%%xmm0                   \n"
60733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "punpckhbw %%xmm2,%%xmm1                   \n"
60833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "pmaddubsw %%xmm5,%%xmm0                   \n"
60933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "pmaddubsw %%xmm5,%%xmm1                   \n"
61033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "psrlw     $0x7,%%xmm0                     \n"
61133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "psrlw     $0x7,%%xmm1                     \n"
61233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "packuswb  %%xmm1,%%xmm0                   \n"
61333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "sub       $0x4,%2                         \n"
61433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    %%xmm0,(%1,%0,1)                \n"
61533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       0x10(%1),%1                     \n"
61633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "jg        1b                              \n"
61733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "jmp       4f                              \n"
61833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ".p2align  4                               \n"
61933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  "2:                                          \n"
62033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    (%1),%%xmm0                     \n"
62133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "sub       $0x4,%2                         \n"
62233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    %%xmm0,(%1,%0,1)                \n"
62333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       0x10(%1),%1                     \n"
62433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "jg        2b                              \n"
62533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "jmp       4f                              \n"
62633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ".p2align  4                               \n"
62733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  "3:                                          \n"
62833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    (%1),%%xmm0                     \n"
62933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "pavgb     (%1,%4,1),%%xmm0                \n"
63033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "sub       $0x4,%2                         \n"
63133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    %%xmm0,(%1,%0,1)                \n"
63233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "lea       0x10(%1),%1                     \n"
63333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "jg        3b                              \n"
63433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  "4:                                          \n"
63533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ".p2align  4                               \n"
63633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "shufps    $0xff,%%xmm0,%%xmm0             \n"
63733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "movdqa    %%xmm0,(%1,%0,1)                \n"
63833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  : "+r"(dst_ptr),     // %0
63933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(src_ptr),     // %1
64033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(dst_width),   // %2
64133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    "+r"(source_y_fraction)  // %3
64233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  : "r"(static_cast<intptr_t>(src_stride))  // %4
64333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  : "memory", "cc"
64433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__SSE2__)
64533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    , "xmm0", "xmm1", "xmm2", "xmm5"
64633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif
64733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  );
64833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
64933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif  // defined(__x86_64__) || defined(__i386__)
65033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
65133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDown2_C(const uint8* src_ptr,
65233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                ptrdiff_t /* src_stride */,
65333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                uint8* dst_ptr, int dst_width) {
65433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  const uint32* src = reinterpret_cast<const uint32*>(src_ptr);
65533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  uint32* dst = reinterpret_cast<uint32*>(dst_ptr);
65633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
65733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  for (int x = 0; x < dst_width - 1; x += 2) {
65833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst[0] = src[0];
65933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst[1] = src[2];
66033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    src += 4;
66133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst += 2;
66233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
66333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  if (dst_width & 1) {
66433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst[0] = src[0];
66533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
66633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
66733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
66833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDown2Int_C(const uint8* src_ptr, ptrdiff_t src_stride,
66933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                   uint8* dst_ptr, int dst_width) {
67033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  for (int x = 0; x < dst_width; ++x) {
67133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr[0] = (src_ptr[0] + src_ptr[4] +
67233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                  src_ptr[src_stride] + src_ptr[src_stride + 4] + 2) >> 2;
67333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr[1] = (src_ptr[1] + src_ptr[5] +
67433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                  src_ptr[src_stride + 1] + src_ptr[src_stride + 5] + 2) >> 2;
67533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr[2] = (src_ptr[2] + src_ptr[6] +
67633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                  src_ptr[src_stride + 2] + src_ptr[src_stride + 6] + 2) >> 2;
67733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr[3] = (src_ptr[3] + src_ptr[7] +
67833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                  src_ptr[src_stride + 3] + src_ptr[src_stride + 7] + 2) >> 2;
67933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    src_ptr += 8;
68033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr += 4;
68133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
68233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
68333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
68433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ScaleARGBRowDownEven_C(const uint8* src_ptr, ptrdiff_t /* src_stride */,
68533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                            int src_stepx,
68633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                            uint8* dst_ptr, int dst_width) {
68733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  const uint32* src = reinterpret_cast<const uint32*>(src_ptr);
68833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  uint32* dst = reinterpret_cast<uint32*>(dst_ptr);
68933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
69033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  for (int x = 0; x < dst_width - 1; x += 2) {
69133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst[0] = src[0];
69233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst[1] = src[src_stepx];
69333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    src += src_stepx * 2;
69433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst += 2;
69533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
69633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  if (dst_width & 1) {
69733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst[0] = src[0];
69833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
69933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
70033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
70133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBRowDownEvenInt_C(const uint8* src_ptr,
70233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                      ptrdiff_t src_stride,
70333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                      int src_stepx,
70433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                      uint8* dst_ptr, int dst_width) {
70533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  for (int x = 0; x < dst_width; ++x) {
70633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr[0] = (src_ptr[0] + src_ptr[4] +
70733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                  src_ptr[src_stride] + src_ptr[src_stride + 4] + 2) >> 2;
70833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr[1] = (src_ptr[1] + src_ptr[5] +
70933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                  src_ptr[src_stride + 1] + src_ptr[src_stride + 5] + 2) >> 2;
71033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr[2] = (src_ptr[2] + src_ptr[6] +
71133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                  src_ptr[src_stride + 2] + src_ptr[src_stride + 6] + 2) >> 2;
71233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr[3] = (src_ptr[3] + src_ptr[7] +
71333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                  src_ptr[src_stride + 3] + src_ptr[src_stride + 7] + 2) >> 2;
71433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    src_ptr += src_stepx * 4;
71533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr += 4;
71633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
71733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
71833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
71933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// (1-f)a + fb can be replaced with a + f(b-a)
72033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
72133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define BLENDER1(a, b, f) (static_cast<int>(a) + \
72233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ((f) * (static_cast<int>(b) - static_cast<int>(a)) >> 16))
72333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
72433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define BLENDERC(a, b, f, s) static_cast<uint32>( \
72533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
72633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
72733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define BLENDER(a, b, f) \
72833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \
72933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
73033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
73133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
73233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                                  int dst_width, int x, int dx) {
73333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  const uint32* src = reinterpret_cast<const uint32*>(src_ptr);
73433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  uint32* dst = reinterpret_cast<uint32*>(dst_ptr);
73533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  for (int j = 0; j < dst_width - 1; j += 2) {
73633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    int xi = x >> 16;
73733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    uint32 a = src[xi];
73833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    uint32 b = src[xi + 1];
73933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst[0] = BLENDER(a, b, x & 0xffff);
74033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    x += dx;
74133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    xi = x >> 16;
74233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    a = src[xi];
74333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    b = src[xi + 1];
74433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst[1] = BLENDER(a, b, x & 0xffff);
74533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    x += dx;
74633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst += 2;
74733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
74833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  if (dst_width & 1) {
74933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    int xi = x >> 16;
75033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    uint32 a = src[xi];
75133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    uint32 b = src[xi + 1];
75233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst[0] = BLENDER(a, b, x & 0xffff);
75333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
75433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
75533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
75633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic const int kMaxInputWidth = 2560;
75733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
75833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// C version 2x2 -> 2x1
75933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ScaleARGBFilterRows_C(uint8* dst_ptr, const uint8* src_ptr,
76033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                           ptrdiff_t src_stride,
76133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                           int dst_width, int source_y_fraction) {
76233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  assert(dst_width > 0);
76333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  int y1_fraction = source_y_fraction;
76433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  int y0_fraction = 256 - y1_fraction;
76533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  const uint8* src_ptr1 = src_ptr + src_stride;
76633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  uint8* end = dst_ptr + (dst_width << 2);
76733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  do {
76833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
76933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
77033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr[2] = (src_ptr[2] * y0_fraction + src_ptr1[2] * y1_fraction) >> 8;
77133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr[3] = (src_ptr[3] * y0_fraction + src_ptr1[3] * y1_fraction) >> 8;
77233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr[4] = (src_ptr[4] * y0_fraction + src_ptr1[4] * y1_fraction) >> 8;
77333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr[5] = (src_ptr[5] * y0_fraction + src_ptr1[5] * y1_fraction) >> 8;
77433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr[6] = (src_ptr[6] * y0_fraction + src_ptr1[6] * y1_fraction) >> 8;
77533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr[7] = (src_ptr[7] * y0_fraction + src_ptr1[7] * y1_fraction) >> 8;
77633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    src_ptr += 8;
77733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    src_ptr1 += 8;
77833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr += 8;
77933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  } while (dst_ptr < end);
78033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  // Duplicate the last pixel (4 bytes) for filtering.
78133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  dst_ptr[0] = dst_ptr[-4];
78233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  dst_ptr[1] = dst_ptr[-3];
78333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  dst_ptr[2] = dst_ptr[-2];
78433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  dst_ptr[3] = dst_ptr[-1];
78533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
78633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
78733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp/**
78833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * ScaleARGB ARGB, 1/2
78933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp *
79033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * This is an optimized version for scaling down a ARGB to 1/2 of
79133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * its original size.
79233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp *
79333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp */
79433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBDown2(int /* src_width */, int /* src_height */,
79533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                           int dst_width, int dst_height,
79633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                           int src_stride, int dst_stride,
79733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                           const uint8* src_ptr, uint8* dst_ptr,
79833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                           FilterMode filtering) {
79933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  void (*ScaleARGBRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
80033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                            uint8* dst_ptr, int dst_width) =
80133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp      filtering ? ScaleARGBRowDown2Int_C : ScaleARGBRowDown2_C;
80233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
80333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  if (TestCpuFlag(kCpuHasSSE2) &&
80433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp      IS_ALIGNED(dst_width, 4) &&
80533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp      IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
80633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp      IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
80733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Int_SSE2 :
80833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp        ScaleARGBRowDown2_SSE2;
80933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
81033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif
81133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
81233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  // TODO(fbarchard): Loop through source height to allow odd height.
81333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  for (int y = 0; y < dst_height; ++y) {
81433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ScaleARGBRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
81533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    src_ptr += (src_stride << 1);
81633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr += dst_stride;
81733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
81833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
81933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
82033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp/**
82133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * ScaleARGB ARGB Even
82233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp *
82333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * This is an optimized version for scaling down a ARGB to even
82433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * multiple of its original size.
82533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp *
82633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp */
82733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBDownEven(int src_width, int src_height,
82833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                              int dst_width, int dst_height,
82933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                              int src_stride, int dst_stride,
83033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                              const uint8* src_ptr, uint8* dst_ptr,
83133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                              FilterMode filtering) {
83233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  assert(IS_ALIGNED(src_width, 2));
83333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  assert(IS_ALIGNED(src_height, 2));
83433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  void (*ScaleARGBRowDownEven)(const uint8* src_ptr, ptrdiff_t src_stride,
83533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                               int src_step, uint8* dst_ptr, int dst_width) =
83633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp      filtering ? ScaleARGBRowDownEvenInt_C : ScaleARGBRowDownEven_C;
83733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
83833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  if (TestCpuFlag(kCpuHasSSE2) &&
83933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp      IS_ALIGNED(dst_width, 4) &&
84033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp      IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
84133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenInt_SSE2 :
84233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp        ScaleARGBRowDownEven_SSE2;
84333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
84433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif
84533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  int src_step = src_width / dst_width;
84633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  // Adjust to point to center of box.
84733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  int row_step = src_height / dst_height;
84833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  int row_stride = row_step * src_stride;
84933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  src_ptr += ((row_step >> 1) - 1) * src_stride + ((src_step >> 1) - 1) * 4;
85033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  for (int y = 0; y < dst_height; ++y) {
85133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ScaleARGBRowDownEven(src_ptr, src_stride, src_step, dst_ptr, dst_width);
85233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    src_ptr += row_stride;
85333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr += dst_stride;
85433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
85533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
85633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp/**
85733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * ScaleARGB ARGB to/from any dimensions, with bilinear
85833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * interpolation.
85933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp */
86033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
86133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBBilinear(int src_width, int src_height,
86233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                              int dst_width, int dst_height,
86333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                              int src_stride, int dst_stride,
86433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                              const uint8* src_ptr, uint8* dst_ptr) {
86533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  assert(dst_width > 0);
86633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  assert(dst_height > 0);
86733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  assert(src_width <= kMaxInputWidth);
86833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  SIMD_ALIGNED(uint8 row[kMaxInputWidth * 4 + 16]);
86933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  void (*ScaleARGBFilterRows)(uint8* dst_ptr, const uint8* src_ptr,
87033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                              ptrdiff_t src_stride,
87133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                              int dst_width, int source_y_fraction) =
87233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp      ScaleARGBFilterRows_C;
87333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(HAS_SCALEARGBFILTERROWS_SSE2)
87433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  if (TestCpuFlag(kCpuHasSSE2) &&
87533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp      IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) {
87633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ScaleARGBFilterRows = ScaleARGBFilterRows_SSE2;
87733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
87833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif
87933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(HAS_SCALEARGBFILTERROWS_SSSE3)
88033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  if (TestCpuFlag(kCpuHasSSSE3) &&
88133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp      IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src_ptr, 16)) {
88233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ScaleARGBFilterRows = ScaleARGBFilterRows_SSSE3;
88333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
88433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif
88533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  int dx = (src_width << 16) / dst_width;
88633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  int dy = (src_height << 16) / dst_height;
88733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  int x = (dx >= 65536) ? ((dx >> 1) - 32768) : (dx >> 1);
88833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  int y = (dy >= 65536) ? ((dy >> 1) - 32768) : (dy >> 1);
88933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  int maxy = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
89033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  for (int j = 0; j < dst_height; ++j) {
89133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    int yi = y >> 16;
89233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    int yf = (y >> 8) & 255;
89333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    const uint8* src = src_ptr + yi * src_stride;
89433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ScaleARGBFilterRows(row, src, src_stride, src_width, yf);
89533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ScaleARGBFilterCols_C(dst_ptr, row, dst_width, x, dx);
89633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr += dst_stride;
89733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    y += dy;
89833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    if (y > maxy) {
89933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp      y = maxy;
90033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    }
90133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
90233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
90333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
90433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Scales a single row of pixels using point sampling.
90533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Code is adapted from libyuv bilinear yuv scaling, but with bilinear
90633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp//     interpolation off, and argb pixels instead of yuv.
90733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBCols(uint8* dst_ptr, const uint8* src_ptr,
90833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                          int dst_width, int x, int dx) {
90933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  const uint32* src = reinterpret_cast<const uint32*>(src_ptr);
91033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  uint32* dst = reinterpret_cast<uint32*>(dst_ptr);
91133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  for (int j = 0; j < dst_width - 1; j += 2) {
91233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst[0] = src[x >> 16];
91333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    x += dx;
91433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst[1] = src[x >> 16];
91533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    x += dx;
91633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst += 2;
91733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
91833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  if (dst_width & 1) {
91933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst[0] = src[x >> 16];
92033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
92133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
92233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
92333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp/**
92433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * ScaleARGB ARGB to/from any dimensions, without interpolation.
92533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * Fixed point math is used for performance: The upper 16 bits
92633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * of x and dx is the integer part of the source position and
92733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * the lower 16 bits are the fixed decimal part.
92833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp */
92933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
93033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBSimple(int src_width, int src_height,
93133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                            int dst_width, int dst_height,
93233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                            int src_stride, int dst_stride,
93333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                            const uint8* src_ptr, uint8* dst_ptr) {
93433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  int dx = (src_width << 16) / dst_width;
93533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  int dy = (src_height << 16) / dst_height;
93633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  int x = (dx >= 65536) ? ((dx >> 1) - 32768) : (dx >> 1);
93733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  int y = (dy >= 65536) ? ((dy >> 1) - 32768) : (dy >> 1);
93833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  for (int i = 0; i < dst_height; ++i) {
93933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ScaleARGBCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
94033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    dst_ptr += dst_stride;
94133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    y += dy;
94233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
94333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
94433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
94533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp/**
94633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * ScaleARGB ARGB to/from any dimensions.
94733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp */
94833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGBAnySize(int src_width, int src_height,
94933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                             int dst_width, int dst_height,
95033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                             int src_stride, int dst_stride,
95133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                             const uint8* src_ptr, uint8* dst_ptr,
95233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                             FilterMode filtering) {
95333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  if (!filtering || (src_width > kMaxInputWidth)) {
95433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ScaleARGBSimple(src_width, src_height, dst_width, dst_height,
95533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                    src_stride, dst_stride, src_ptr, dst_ptr);
95633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  } else {
95733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ScaleARGBBilinear(src_width, src_height, dst_width, dst_height,
95833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                      src_stride, dst_stride, src_ptr, dst_ptr);
95933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
96033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
96133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
96233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// ScaleARGB a ARGB.
96333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp//
96433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// This function in turn calls a scaling function
96533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// suitable for handling the desired resolutions.
96633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
96733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic void ScaleARGB(const uint8* src, int src_stride,
96833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                      int src_width, int src_height,
96933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                      uint8* dst, int dst_stride,
97033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                      int dst_width, int dst_height,
97133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                      FilterMode filtering) {
97233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef CPU_X86
97333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  // environment variable overrides for testing.
97433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  char *filter_override = getenv("LIBYUV_FILTER");
97533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  if (filter_override) {
97633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    filtering = (FilterMode)atoi(filter_override);  // NOLINT
97733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
97833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif
97933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  if (dst_width == src_width && dst_height == src_height) {
98033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    // Straight copy.
98133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ARGBCopy(src, src_stride, dst, dst_stride, dst_width, dst_height);
98233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    return;
98333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
98433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  if (2 * dst_width == src_width && 2 * dst_height == src_height) {
98533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    // Optimized 1/2.
98633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    ScaleARGBDown2(src_width, src_height, dst_width, dst_height,
98733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                   src_stride, dst_stride, src, dst, filtering);
98833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    return;
98933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
99033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  int scale_down_x = src_width / dst_width;
99133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  int scale_down_y = src_height / dst_height;
99233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  if (dst_width * scale_down_x == src_width &&
99333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp      dst_height * scale_down_y == src_height) {
99433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    if (!(scale_down_x & 1) && !(scale_down_y & 1)) {
99533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp      // Optimized even scale down. ie 4, 6, 8, 10x
99633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp      ScaleARGBDownEven(src_width, src_height, dst_width, dst_height,
99733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                        src_stride, dst_stride, src, dst, filtering);
99833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp      return;
99933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    }
100033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    if ((scale_down_x & 1) && (scale_down_y & 1)) {
100133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp      filtering = kFilterNone;
100233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    }
100333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
100433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  // Arbitrary scale up and/or down.
100533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  ScaleARGBAnySize(src_width, src_height, dst_width, dst_height,
100633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp                   src_stride, dst_stride, src, dst, filtering);
100733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
100833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
100933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// ScaleARGB an ARGB image.
101033cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampLIBYUV_API
101133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampint ARGBScale(const uint8* src_argb, int src_stride_argb,
101233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp             int src_width, int src_height,
101333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp             uint8* dst_argb, int dst_stride_argb,
101433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp             int dst_width, int dst_height,
101533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp             FilterMode filtering) {
101633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  if (!src_argb || src_width <= 0 || src_height == 0 ||
101733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp      !dst_argb || dst_width <= 0 || dst_height <= 0) {
101833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    return -1;
101933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
102033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  // Negative height means invert the image.
102133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  if (src_height < 0) {
102233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    src_height = -src_height;
102333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    src_argb = src_argb + (src_height - 1) * src_stride_argb;
102433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp    src_stride_argb = -src_stride_argb;
102533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  }
102633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
102733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp            dst_argb, dst_stride_argb, dst_width, dst_height,
102833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp            filtering);
102933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp  return 0;
103033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}
103133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp
103233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef __cplusplus
103333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}  // extern "C"
103433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp}  // namespace libyuv
103533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif
1036