17cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde/* 233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * Copyright 2011 The LibYuv Project Authors. All rights reserved. 37cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde * 47cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde * Use of this source code is governed by a BSD-style license 57cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde * that can be found in the LICENSE file in the root of the source 67cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde * tree. An additional intellectual property rights grant can be found 77cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde * in the file PATENTS. All contributing project authors may 87cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde * be found in the AUTHORS file in the root of the source tree. 97cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde */ 107cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 117cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#include "libyuv/rotate.h" 127cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 137cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#include "libyuv/cpu_id.h" 1433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include "libyuv/convert.h" 1533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include "libyuv/planar_functions.h" 1633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include "libyuv/row.h" 177cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 1833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef __cplusplus 197cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordenamespace libyuv { 2033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampextern "C" { 2133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 227cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 2333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if !defined(YUV_DISABLE_ASM) && \ 2433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) 2533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(__APPLE__) && defined(__i386__) 2633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define DECLARE_FUNCTION(name) \ 2733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".text \n" \ 2833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".private_extern _" #name " \n" \ 2933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".align 4,0x90 \n" \ 3033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp"_" #name ": \n" 3133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__) 3233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define DECLARE_FUNCTION(name) \ 3333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".text \n" \ 3433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".align 4,0x90 \n" \ 3533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp"_" #name ": \n" 367cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#else 3733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define DECLARE_FUNCTION(name) \ 3833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".text \n" \ 3933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".align 4,0x90 \n" \ 4033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#name ": \n" 417cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#endif 427cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#endif 437cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 4433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if !defined(YUV_DISABLE_ASM) && defined(__ARM_NEON__) 4533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_MIRRORROW_NEON 4633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid MirrorRow_NEON(const uint8* src, uint8* dst, int width); 4733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define HAS_MIRRORROW_UV_NEON 4833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid MirrorRowUV_NEON(const uint8* src, 497cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_a, uint8* dst_b, 507cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int width); 517cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#define HAS_TRANSPOSE_WX8_NEON 527cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordevoid TransposeWx8_NEON(const uint8* src, int src_stride, 537cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst, int dst_stride, int width); 547cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#define HAS_TRANSPOSE_UVWX8_NEON 557cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordevoid TransposeUVWx8_NEON(const uint8* src, int src_stride, 567cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_a, int dst_stride_a, 577cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_b, int dst_stride_b, 587cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int width); 5933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // defined(__ARM_NEON__) 607cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 6133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) 627cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#define HAS_TRANSPOSE_WX8_SSSE3 6333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp__declspec(naked) __declspec(align(16)) 647cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordestatic void TransposeWx8_SSSE3(const uint8* src, int src_stride, 657cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst, int dst_stride, int width) { 6633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp __asm { 677cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde push edi 687cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde push esi 697cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde push ebp 707cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde mov eax, [esp + 12 + 4] // src 717cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde mov edi, [esp + 12 + 8] // src_stride 727cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde mov edx, [esp + 12 + 12] // dst 737cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde mov esi, [esp + 12 + 16] // dst_stride 747cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde mov ecx, [esp + 12 + 20] // width 7533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 767cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Read in the data from the source pointer. 777cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // First round of bit swap. 7833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp align 16 7933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp convertloop: 807cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movq xmm0, qword ptr [eax] 817cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea ebp, [eax + 8] 827cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movq xmm1, qword ptr [eax + edi] 837cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea eax, [eax + 2 * edi] 847cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpcklbw xmm0, xmm1 857cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movq xmm2, qword ptr [eax] 867cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm1, xmm0 877cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde palignr xmm1, xmm1, 8 887cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movq xmm3, qword ptr [eax + edi] 897cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea eax, [eax + 2 * edi] 907cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpcklbw xmm2, xmm3 917cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm3, xmm2 927cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movq xmm4, qword ptr [eax] 937cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde palignr xmm3, xmm3, 8 947cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movq xmm5, qword ptr [eax + edi] 957cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpcklbw xmm4, xmm5 967cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea eax, [eax + 2 * edi] 977cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm5, xmm4 987cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movq xmm6, qword ptr [eax] 997cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde palignr xmm5, xmm5, 8 1007cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movq xmm7, qword ptr [eax + edi] 1017cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpcklbw xmm6, xmm7 1027cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde mov eax, ebp 1037cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm7, xmm6 1047cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde palignr xmm7, xmm7, 8 1057cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Second round of bit swap. 1067cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpcklwd xmm0, xmm2 1077cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpcklwd xmm1, xmm3 1087cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm2, xmm0 1097cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm3, xmm1 1107cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde palignr xmm2, xmm2, 8 1117cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde palignr xmm3, xmm3, 8 1127cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpcklwd xmm4, xmm6 1137cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpcklwd xmm5, xmm7 1147cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm6, xmm4 1157cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm7, xmm5 1167cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde palignr xmm6, xmm6, 8 1177cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde palignr xmm7, xmm7, 8 1187cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Third round of bit swap. 1197cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Write to the destination pointer. 1207cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckldq xmm0, xmm4 1217cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movq qword ptr [edx], xmm0 1227cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm4, xmm0 1237cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde palignr xmm4, xmm4, 8 1247cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movq qword ptr [edx + esi], xmm4 1257cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea edx, [edx + 2 * esi] 1267cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckldq xmm2, xmm6 1277cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm6, xmm2 1287cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde palignr xmm6, xmm6, 8 1297cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movq qword ptr [edx], xmm2 1307cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckldq xmm1, xmm5 1317cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movq qword ptr [edx + esi], xmm6 1327cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea edx, [edx + 2 * esi] 1337cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm5, xmm1 1347cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movq qword ptr [edx], xmm1 1357cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde palignr xmm5, xmm5, 8 1367cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckldq xmm3, xmm7 1377cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movq qword ptr [edx + esi], xmm5 1387cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea edx, [edx + 2 * esi] 1397cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movq qword ptr [edx], xmm3 1407cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm7, xmm3 1417cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde palignr xmm7, xmm7, 8 14233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp sub ecx, 8 1437cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movq qword ptr [edx + esi], xmm7 1447cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea edx, [edx + 2 * esi] 14533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp jg convertloop 1467cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 1477cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde pop ebp 1487cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde pop esi 1497cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde pop edi 1507cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde ret 1517cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 1527cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 1537cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 1547cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#define HAS_TRANSPOSE_UVWX8_SSE2 15533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp__declspec(naked) __declspec(align(16)) 1567cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordestatic void TransposeUVWx8_SSE2(const uint8* src, int src_stride, 1577cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_a, int dst_stride_a, 1587cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_b, int dst_stride_b, 1597cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int w) { 16033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp __asm { 1617cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde push ebx 1627cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde push esi 1637cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde push edi 1647cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde push ebp 1657cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde mov eax, [esp + 16 + 4] // src 1667cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde mov edi, [esp + 16 + 8] // src_stride 1677cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde mov edx, [esp + 16 + 12] // dst_a 1687cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde mov esi, [esp + 16 + 16] // dst_stride_a 1697cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde mov ebx, [esp + 16 + 20] // dst_b 1707cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde mov ebp, [esp + 16 + 24] // dst_stride_b 1717cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde mov ecx, esp 1727cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde sub esp, 4 + 16 1737cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde and esp, ~15 1747cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde mov [esp + 16], ecx 1757cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde mov ecx, [ecx + 16 + 28] // w 17633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 17733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp align 16 17833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp convertloop: 1797cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Read in the data from the source pointer. 1807cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // First round of bit swap. 1817cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm0, [eax] 1827cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm1, [eax + edi] 1837cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea eax, [eax + 2 * edi] 1847cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm7, xmm0 // use xmm7 as temp register. 1857cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpcklbw xmm0, xmm1 1867cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckhbw xmm7, xmm1 1877cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm1, xmm7 1887cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm2, [eax] 1897cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm3, [eax + edi] 1907cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea eax, [eax + 2 * edi] 1917cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm7, xmm2 1927cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpcklbw xmm2, xmm3 1937cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckhbw xmm7, xmm3 1947cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm3, xmm7 1957cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm4, [eax] 1967cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm5, [eax + edi] 1977cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea eax, [eax + 2 * edi] 1987cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm7, xmm4 1997cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpcklbw xmm4, xmm5 2007cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckhbw xmm7, xmm5 2017cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm5, xmm7 2027cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm6, [eax] 2037cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm7, [eax + edi] 2047cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea eax, [eax + 2 * edi] 2057cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa [esp], xmm5 // backup xmm5 2067cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde neg edi 2077cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm5, xmm6 // use xmm5 as temp register. 2087cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpcklbw xmm6, xmm7 2097cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckhbw xmm5, xmm7 2107cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm7, xmm5 2117cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea eax, [eax + 8 * edi + 16] 2127cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde neg edi 2137cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Second round of bit swap. 2147cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm5, xmm0 2157cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpcklwd xmm0, xmm2 2167cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckhwd xmm5, xmm2 2177cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm2, xmm5 2187cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm5, xmm1 2197cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpcklwd xmm1, xmm3 2207cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckhwd xmm5, xmm3 2217cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm3, xmm5 2227cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm5, xmm4 2237cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpcklwd xmm4, xmm6 2247cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckhwd xmm5, xmm6 2257cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm6, xmm5 2267cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm5, [esp] // restore xmm5 2277cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa [esp], xmm6 // backup xmm6 2287cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm6, xmm5 // use xmm6 as temp register. 2297cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpcklwd xmm5, xmm7 2307cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckhwd xmm6, xmm7 2317cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm7, xmm6 2327cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Third round of bit swap. 2337cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Write to the destination pointer. 2347cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm6, xmm0 2357cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckldq xmm0, xmm4 2367cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckhdq xmm6, xmm4 2377cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm4, xmm6 2387cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm6, [esp] // restore xmm6 2397cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movlpd qword ptr [edx], xmm0 2407cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movhpd qword ptr [ebx], xmm0 2417cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movlpd qword ptr [edx + esi], xmm4 2427cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea edx, [edx + 2 * esi] 2437cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movhpd qword ptr [ebx + ebp], xmm4 2447cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea ebx, [ebx + 2 * ebp] 2457cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm0, xmm2 // use xmm0 as the temp register. 2467cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckldq xmm2, xmm6 2477cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movlpd qword ptr [edx], xmm2 2487cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movhpd qword ptr [ebx], xmm2 2497cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckhdq xmm0, xmm6 2507cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movlpd qword ptr [edx + esi], xmm0 2517cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea edx, [edx + 2 * esi] 2527cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movhpd qword ptr [ebx + ebp], xmm0 2537cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea ebx, [ebx + 2 * ebp] 2547cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm0, xmm1 // use xmm0 as the temp register. 2557cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckldq xmm1, xmm5 2567cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movlpd qword ptr [edx], xmm1 2577cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movhpd qword ptr [ebx], xmm1 2587cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckhdq xmm0, xmm5 2597cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movlpd qword ptr [edx + esi], xmm0 2607cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea edx, [edx + 2 * esi] 2617cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movhpd qword ptr [ebx + ebp], xmm0 2627cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea ebx, [ebx + 2 * ebp] 2637cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movdqa xmm0, xmm3 // use xmm0 as the temp register. 2647cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckldq xmm3, xmm7 2657cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movlpd qword ptr [edx], xmm3 2667cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movhpd qword ptr [ebx], xmm3 2677cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde punpckhdq xmm0, xmm7 26833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp sub ecx, 8 2697cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movlpd qword ptr [edx + esi], xmm0 2707cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea edx, [edx + 2 * esi] 2717cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde movhpd qword ptr [ebx + ebp], xmm0 2727cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde lea ebx, [ebx + 2 * ebp] 27333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp jg convertloop 2747cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 2757cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde mov esp, [esp + 16] 2767cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde pop ebp 2777cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde pop edi 2787cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde pop esi 2797cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde pop ebx 2807cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde ret 2817cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 2827cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 28333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#elif !defined(YUV_DISABLE_ASM) && (defined(__i386__) || defined(__x86_64__)) 2847cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#define HAS_TRANSPOSE_WX8_SSSE3 2857cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordestatic void TransposeWx8_SSSE3(const uint8* src, int src_stride, 2867cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst, int dst_stride, int width) { 28733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 28833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // Read in the data from the source pointer. 28933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // First round of bit swap. 29033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 29133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 29233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%0),%%xmm0 \n" 29333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%0,%3),%%xmm1 \n" 29433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%0,%3,2),%0 \n" 29533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm0 \n" 29633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%0),%%xmm2 \n" 29733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 29833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm1,%%xmm1 \n" 29933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%0,%3),%%xmm3 \n" 30033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%0,%3,2),%0 \n" 30133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm3,%%xmm2 \n" 30233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm3 \n" 30333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%0),%%xmm4 \n" 30433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm3,%%xmm3 \n" 30533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%0,%3),%%xmm5 \n" 30633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%0,%3,2),%0 \n" 30733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm4 \n" 30833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm4,%%xmm5 \n" 30933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%0),%%xmm6 \n" 31033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm5,%%xmm5 \n" 31133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq (%0,%3),%%xmm7 \n" 31233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%0,%3,2),%0 \n" 31333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm7,%%xmm6 \n" 31433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "neg %3 \n" 31533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm6,%%xmm7 \n" 31633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x8(%0,%3,8),%0 \n" 31733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm7,%%xmm7 \n" 31833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "neg %3 \n" 31933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // Second round of bit swap. 32033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm2,%%xmm0 \n" 32133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm3,%%xmm1 \n" 32233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm2 \n" 32333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,%%xmm3 \n" 32433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm2,%%xmm2 \n" 32533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm3,%%xmm3 \n" 32633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm6,%%xmm4 \n" 32733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm7,%%xmm5 \n" 32833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm4,%%xmm6 \n" 32933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm5,%%xmm7 \n" 33033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm6,%%xmm6 \n" 33133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm7,%%xmm7 \n" 33233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // Third round of bit swap. 33333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // Write to the destination pointer. 33433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm4,%%xmm0 \n" 33533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm0,(%1) \n" 33633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm4 \n" 33733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm4,%%xmm4 \n" 33833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm4,(%1,%4) \n" 33933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%4,2),%1 \n" 34033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm6,%%xmm2 \n" 34133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm6 \n" 34233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm2,(%1) \n" 34333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm6,%%xmm6 \n" 34433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm5,%%xmm1 \n" 34533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm6,(%1,%4) \n" 34633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%4,2),%1 \n" 34733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,%%xmm5 \n" 34833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm1,(%1) \n" 34933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm5,%%xmm5 \n" 35033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm5,(%1,%4) \n" 35133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%4,2),%1 \n" 35233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm7,%%xmm3 \n" 35333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm3,(%1) \n" 35433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm3,%%xmm7 \n" 35533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm7,%%xmm7 \n" 35633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%2 \n" 35733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm7,(%1,%4) \n" 35833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%4,2),%1 \n" 35933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 36033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src), // %0 36133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst), // %1 36233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %2 36333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(static_cast<intptr_t>(src_stride)), // %3 36433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "r"(static_cast<intptr_t>(dst_stride)) // %4 36533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc" 36633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp #if defined(__SSE2__) 36733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" 36833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp #endif 36933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 3707cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 3717cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 37233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if !defined(YUV_DISABLE_ASM) && defined (__i386__) 3737cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#define HAS_TRANSPOSE_UVWX8_SSE2 3747cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordeextern "C" void TransposeUVWx8_SSE2(const uint8* src, int src_stride, 3757cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_a, int dst_stride_a, 3767cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_b, int dst_stride_b, 3777cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int w); 37833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm ( 37933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp DECLARE_FUNCTION(TransposeUVWx8_SSE2) 38033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "push %ebx \n" 38133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "push %esi \n" 38233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "push %edi \n" 38333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "push %ebp \n" 38433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov 0x14(%esp),%eax \n" 38533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov 0x18(%esp),%edi \n" 38633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov 0x1c(%esp),%edx \n" 38733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov 0x20(%esp),%esi \n" 38833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov 0x24(%esp),%ebx \n" 38933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov 0x28(%esp),%ebp \n" 39033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov %esp,%ecx \n" 39133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x14,%esp \n" 39233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "and $0xfffffff0,%esp \n" 39333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov %ecx,0x10(%esp) \n" 39433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov 0x2c(%ecx),%ecx \n" 3957cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 39633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp"1: \n" 39733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%eax),%xmm0 \n" 39833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%eax,%edi,1),%xmm1 \n" 39933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%eax,%edi,2),%eax \n" 40033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm0,%xmm7 \n" 40133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %xmm1,%xmm0 \n" 40233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %xmm1,%xmm7 \n" 40333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm7,%xmm1 \n" 40433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%eax),%xmm2 \n" 40533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%eax,%edi,1),%xmm3 \n" 40633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%eax,%edi,2),%eax \n" 40733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm2,%xmm7 \n" 40833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %xmm3,%xmm2 \n" 40933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %xmm3,%xmm7 \n" 41033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm7,%xmm3 \n" 41133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%eax),%xmm4 \n" 41233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%eax,%edi,1),%xmm5 \n" 41333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%eax,%edi,2),%eax \n" 41433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm4,%xmm7 \n" 41533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %xmm5,%xmm4 \n" 41633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %xmm5,%xmm7 \n" 41733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm7,%xmm5 \n" 41833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%eax),%xmm6 \n" 41933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%eax,%edi,1),%xmm7 \n" 42033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%eax,%edi,2),%eax \n" 42133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm5,(%esp) \n" 42233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "neg %edi \n" 42333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm6,%xmm5 \n" 42433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %xmm7,%xmm6 \n" 42533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %xmm7,%xmm5 \n" 42633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm5,%xmm7 \n" 42733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%eax,%edi,8),%eax \n" 42833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "neg %edi \n" 42933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm0,%xmm5 \n" 43033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %xmm2,%xmm0 \n" 43133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %xmm2,%xmm5 \n" 43233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm5,%xmm2 \n" 43333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm1,%xmm5 \n" 43433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %xmm3,%xmm1 \n" 43533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %xmm3,%xmm5 \n" 43633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm5,%xmm3 \n" 43733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm4,%xmm5 \n" 43833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %xmm6,%xmm4 \n" 43933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %xmm6,%xmm5 \n" 44033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm5,%xmm6 \n" 44133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%esp),%xmm5 \n" 44233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm6,(%esp) \n" 44333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm5,%xmm6 \n" 44433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %xmm7,%xmm5 \n" 44533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %xmm7,%xmm6 \n" 44633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm6,%xmm7 \n" 44733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm0,%xmm6 \n" 44833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %xmm4,%xmm0 \n" 44933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhdq %xmm4,%xmm6 \n" 45033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm6,%xmm4 \n" 45133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%esp),%xmm6 \n" 45233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlpd %xmm0,(%edx) \n" 45333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhpd %xmm0,(%ebx) \n" 45433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlpd %xmm4,(%edx,%esi,1) \n" 45533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%edx,%esi,2),%edx \n" 45633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhpd %xmm4,(%ebx,%ebp,1) \n" 45733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%ebx,%ebp,2),%ebx \n" 45833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm2,%xmm0 \n" 45933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %xmm6,%xmm2 \n" 46033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlpd %xmm2,(%edx) \n" 46133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhpd %xmm2,(%ebx) \n" 46233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhdq %xmm6,%xmm0 \n" 46333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlpd %xmm0,(%edx,%esi,1) \n" 46433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%edx,%esi,2),%edx \n" 46533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhpd %xmm0,(%ebx,%ebp,1) \n" 46633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%ebx,%ebp,2),%ebx \n" 46733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm1,%xmm0 \n" 46833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %xmm5,%xmm1 \n" 46933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlpd %xmm1,(%edx) \n" 47033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhpd %xmm1,(%ebx) \n" 47133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhdq %xmm5,%xmm0 \n" 47233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlpd %xmm0,(%edx,%esi,1) \n" 47333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%edx,%esi,2),%edx \n" 47433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhpd %xmm0,(%ebx,%ebp,1) \n" 47533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%ebx,%ebp,2),%ebx \n" 47633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %xmm3,%xmm0 \n" 47733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %xmm7,%xmm3 \n" 47833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlpd %xmm3,(%edx) \n" 47933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhpd %xmm3,(%ebx) \n" 48033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhdq %xmm7,%xmm0 \n" 48133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%ecx \n" 48233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlpd %xmm0,(%edx,%esi,1) \n" 48333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%edx,%esi,2),%edx \n" 48433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhpd %xmm0,(%ebx,%ebp,1) \n" 48533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%ebx,%ebp,2),%ebx \n" 48633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 48733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov 0x10(%esp),%esp \n" 48833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pop %ebp \n" 48933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pop %edi \n" 49033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pop %esi \n" 49133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "pop %ebx \n" 49233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "ret \n" 4937cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde); 49433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#elif !defined(YUV_DISABLE_ASM) && defined(__x86_64__) 4957cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde// 64 bit version has enough registers to do 16x8 to 8x16 at a time. 4967cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#define HAS_TRANSPOSE_WX8_FAST_SSSE3 4977cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordestatic void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride, 4987cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst, int dst_stride, int width) { 49933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 5007cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Read in the data from the source pointer. 5017cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // First round of bit swap. 50233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 50333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp"1: \n" 50433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 50533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0,%3),%%xmm1 \n" 50633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%0,%3,2),%0 \n" 50733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm8 \n" 50833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm0 \n" 50933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm1,%%xmm8 \n" 51033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm2 \n" 51133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm1 \n" 51233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm8,%%xmm9 \n" 51333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm1,%%xmm1 \n" 51433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm9,%%xmm9 \n" 51533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0,%3),%%xmm3 \n" 51633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%0,%3,2),%0 \n" 51733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm10 \n" 51833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm3,%%xmm2 \n" 51933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm3,%%xmm10 \n" 52033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm3 \n" 52133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm10,%%xmm11 \n" 52233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm4 \n" 52333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm3,%%xmm3 \n" 52433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm11,%%xmm11 \n" 52533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0,%3),%%xmm5 \n" 52633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%0,%3,2),%0 \n" 52733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm4,%%xmm12 \n" 52833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm4 \n" 52933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm5,%%xmm12 \n" 53033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm4,%%xmm5 \n" 53133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm12,%%xmm13 \n" 53233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm6 \n" 53333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm5,%%xmm5 \n" 53433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm13,%%xmm13 \n" 53533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0,%3),%%xmm7 \n" 53633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%0,%3,2),%0 \n" 53733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm6,%%xmm14 \n" 53833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm7,%%xmm6 \n" 53933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm7,%%xmm14 \n" 54033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "neg %3 \n" 54133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm6,%%xmm7 \n" 54233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm14,%%xmm15 \n" 54333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0,%3,8),%0 \n" 54433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm7,%%xmm7 \n" 54533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm15,%%xmm15 \n" 54633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "neg %3 \n" 5477cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Second round of bit swap. 54833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm2,%%xmm0 \n" 54933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm3,%%xmm1 \n" 55033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm2 \n" 55133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,%%xmm3 \n" 55233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm2,%%xmm2 \n" 55333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm3,%%xmm3 \n" 55433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm6,%%xmm4 \n" 55533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm7,%%xmm5 \n" 55633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm4,%%xmm6 \n" 55733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm5,%%xmm7 \n" 55833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm6,%%xmm6 \n" 55933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm7,%%xmm7 \n" 56033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm10,%%xmm8 \n" 56133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm11,%%xmm9 \n" 56233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm8,%%xmm10 \n" 56333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm9,%%xmm11 \n" 56433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm10,%%xmm10 \n" 56533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm11,%%xmm11 \n" 56633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm14,%%xmm12 \n" 56733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm15,%%xmm13 \n" 56833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm12,%%xmm14 \n" 56933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm13,%%xmm15 \n" 57033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm14,%%xmm14 \n" 57133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm15,%%xmm15 \n" 5727cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Third round of bit swap. 5737cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Write to the destination pointer. 57433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm4,%%xmm0 \n" 57533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm0,(%1) \n" 57633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm4 \n" 57733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm4,%%xmm4 \n" 57833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm4,(%1,%4) \n" 57933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%4,2),%1 \n" 58033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm6,%%xmm2 \n" 58133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm6 \n" 58233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm2,(%1) \n" 58333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm6,%%xmm6 \n" 58433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm5,%%xmm1 \n" 58533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm6,(%1,%4) \n" 58633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%4,2),%1 \n" 58733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,%%xmm5 \n" 58833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm1,(%1) \n" 58933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm5,%%xmm5 \n" 59033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm5,(%1,%4) \n" 59133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%4,2),%1 \n" 59233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm7,%%xmm3 \n" 59333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm3,(%1) \n" 59433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm3,%%xmm7 \n" 59533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm7,%%xmm7 \n" 59633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm7,(%1,%4) \n" 59733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%4,2),%1 \n" 59833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm12,%%xmm8 \n" 59933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm8,(%1) \n" 60033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm8,%%xmm12 \n" 60133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm12,%%xmm12 \n" 60233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm12,(%1,%4) \n" 60333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%4,2),%1 \n" 60433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm14,%%xmm10 \n" 60533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm10,%%xmm14 \n" 60633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm10,(%1) \n" 60733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm14,%%xmm14 \n" 60833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm13,%%xmm9 \n" 60933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm14,(%1,%4) \n" 61033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%4,2),%1 \n" 61133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm9,%%xmm13 \n" 61233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm9,(%1) \n" 61333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm13,%%xmm13 \n" 61433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm13,(%1,%4) \n" 61533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%4,2),%1 \n" 61633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm15,%%xmm11 \n" 61733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm11,(%1) \n" 61833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm11,%%xmm15 \n" 61933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "palignr $0x8,%%xmm15,%%xmm15 \n" 62033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x10,%2 \n" 62133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movq %%xmm15,(%1,%4) \n" 62233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%4,2),%1 \n" 62333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 6247cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde : "+r"(src), // %0 6257cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde "+r"(dst), // %1 6267cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde "+r"(width) // %2 6277cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde : "r"(static_cast<intptr_t>(src_stride)), // %3 6287cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde "r"(static_cast<intptr_t>(dst_stride)) // %4 62933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", 63033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", 63133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" 6327cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde); 6337cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 6347cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 6357cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#define HAS_TRANSPOSE_UVWX8_SSE2 6367cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordestatic void TransposeUVWx8_SSE2(const uint8* src, int src_stride, 6377cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_a, int dst_stride_a, 6387cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_b, int dst_stride_b, 6397cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int w) { 64033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 6417cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Read in the data from the source pointer. 6427cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // First round of bit swap. 64333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 4 \n" 64433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp"1: \n" 64533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm0 \n" 64633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0,%4),%%xmm1 \n" 64733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%0,%4,2),%0 \n" 64833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm8 \n" 64933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm1,%%xmm0 \n" 65033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm1,%%xmm8 \n" 65133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm8,%%xmm1 \n" 65233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm2 \n" 65333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0,%4),%%xmm3 \n" 65433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%0,%4,2),%0 \n" 65533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm8 \n" 65633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm3,%%xmm2 \n" 65733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm3,%%xmm8 \n" 65833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm8,%%xmm3 \n" 65933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm4 \n" 66033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0,%4),%%xmm5 \n" 66133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%0,%4,2),%0 \n" 66233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm4,%%xmm8 \n" 66333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm5,%%xmm4 \n" 66433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm5,%%xmm8 \n" 66533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm8,%%xmm5 \n" 66633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0),%%xmm6 \n" 66733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa (%0,%4),%%xmm7 \n" 66833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%0,%4,2),%0 \n" 66933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm6,%%xmm8 \n" 67033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklbw %%xmm7,%%xmm6 \n" 67133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "neg %4 \n" 67233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea 0x10(%0,%4,8),%0 \n" 67333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhbw %%xmm7,%%xmm8 \n" 67433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm8,%%xmm7 \n" 67533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "neg %4 \n" 6767cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Second round of bit swap. 67733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm8 \n" 67833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,%%xmm9 \n" 67933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm2,%%xmm8 \n" 68033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm3,%%xmm9 \n" 68133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm2,%%xmm0 \n" 68233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm3,%%xmm1 \n" 68333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm8,%%xmm2 \n" 68433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm9,%%xmm3 \n" 68533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm4,%%xmm8 \n" 68633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm5,%%xmm9 \n" 68733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm6,%%xmm8 \n" 68833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhwd %%xmm7,%%xmm9 \n" 68933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm6,%%xmm4 \n" 69033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpcklwd %%xmm7,%%xmm5 \n" 69133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm8,%%xmm6 \n" 69233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm9,%%xmm7 \n" 6937cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Third round of bit swap. 6947cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Write to the destination pointer. 69533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm0,%%xmm8 \n" 69633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm4,%%xmm0 \n" 69733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlpd %%xmm0,(%1) \n" // Write back U channel 69833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhpd %%xmm0,(%2) \n" // Write back V channel 69933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhdq %%xmm4,%%xmm8 \n" 70033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlpd %%xmm8,(%1,%5) \n" 70133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%5,2),%1 \n" 70233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhpd %%xmm8,(%2,%6) \n" 70333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%2,%6,2),%2 \n" 70433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm2,%%xmm8 \n" 70533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm6,%%xmm2 \n" 70633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlpd %%xmm2,(%1) \n" 70733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhpd %%xmm2,(%2) \n" 70833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhdq %%xmm6,%%xmm8 \n" 70933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlpd %%xmm8,(%1,%5) \n" 71033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%5,2),%1 \n" 71133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhpd %%xmm8,(%2,%6) \n" 71233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%2,%6,2),%2 \n" 71333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm1,%%xmm8 \n" 71433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm5,%%xmm1 \n" 71533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlpd %%xmm1,(%1) \n" 71633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhpd %%xmm1,(%2) \n" 71733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhdq %%xmm5,%%xmm8 \n" 71833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlpd %%xmm8,(%1,%5) \n" 71933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%5,2),%1 \n" 72033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhpd %%xmm8,(%2,%6) \n" 72133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%2,%6,2),%2 \n" 72233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movdqa %%xmm3,%%xmm8 \n" 72333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckldq %%xmm7,%%xmm3 \n" 72433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlpd %%xmm3,(%1) \n" 72533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhpd %%xmm3,(%2) \n" 72633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "punpckhdq %%xmm7,%%xmm8 \n" 72733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub $0x8,%3 \n" 72833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movlpd %%xmm8,(%1,%5) \n" 72933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%1,%5,2),%1 \n" 73033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "movhpd %%xmm8,(%2,%6) \n" 73133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lea (%2,%6,2),%2 \n" 73233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "jg 1b \n" 7337cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde : "+r"(src), // %0 7347cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde "+r"(dst_a), // %1 7357cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde "+r"(dst_b), // %2 7367cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde "+r"(w) // %3 7377cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde : "r"(static_cast<intptr_t>(src_stride)), // %4 7387cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde "r"(static_cast<intptr_t>(dst_stride_a)), // %5 7397cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde "r"(static_cast<intptr_t>(dst_stride_b)) // %6 74033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", 74133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", 74233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "xmm8", "xmm9" 7437cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde); 7447cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 7457cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#endif 7467cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#endif 7477cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 7487cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordestatic void TransposeWx8_C(const uint8* src, int src_stride, 7497cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst, int dst_stride, 75033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 75133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int i = 0; i < width; ++i) { 7527cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst[0] = src[0 * src_stride]; 7537cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst[1] = src[1 * src_stride]; 7547cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst[2] = src[2 * src_stride]; 7557cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst[3] = src[3 * src_stride]; 7567cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst[4] = src[4 * src_stride]; 7577cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst[5] = src[5 * src_stride]; 7587cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst[6] = src[6 * src_stride]; 7597cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst[7] = src[7 * src_stride]; 7607cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde ++src; 7617cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst += dst_stride; 7627cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 7637cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 7647cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 7657cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordestatic void TransposeWxH_C(const uint8* src, int src_stride, 7667cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst, int dst_stride, 7677cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int width, int height) { 76833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int i = 0; i < width; ++i) { 76933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int j = 0; j < height; ++j) { 7707cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst[i * dst_stride + j] = src[j * src_stride + i]; 77133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 77233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 7737cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 7747cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 77533cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampLIBYUV_API 7767cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordevoid TransposePlane(const uint8* src, int src_stride, 7777cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst, int dst_stride, 7787cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int width, int height) { 77933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp void (*TransposeWx8)(const uint8* src, int src_stride, 78033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst, int dst_stride, 78133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) = TransposeWx8_C; 7827cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#if defined(HAS_TRANSPOSE_WX8_NEON) 78333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (TestCpuFlag(kCpuHasNEON)) { 7847cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde TransposeWx8 = TransposeWx8_NEON; 78533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 7867cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#endif 7877cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#if defined(HAS_TRANSPOSE_WX8_SSSE3) 78833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { 7897cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde TransposeWx8 = TransposeWx8_SSSE3; 79033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 7917cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#endif 79233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3) 79333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (TestCpuFlag(kCpuHasSSSE3) && 79433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(width, 16) && 79533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) { 79633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp TransposeWx8 = TransposeWx8_FAST_SSSE3; 7977cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 79833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 7997cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 80033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // Work across the source in 8x8 tiles 80133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int i = height; 8027cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde while (i >= 8) { 8037cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde TransposeWx8(src, src_stride, dst, dst_stride, width); 80433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src += 8 * src_stride; // Go down 8 rows. 80533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst += 8; // Move over 8 columns. 80633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp i -= 8; 8077cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 8087cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 80933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp TransposeWxH_C(src, src_stride, dst, dst_stride, width, i); 8107cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 8117cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 81233cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampLIBYUV_API 8137cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordevoid RotatePlane90(const uint8* src, int src_stride, 8147cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst, int dst_stride, 8157cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int width, int height) { 8167cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Rotate by 90 is a transpose with the source read 81733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // from bottom to top. So set the source pointer to the end 8187cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // of the buffer and flip the sign of the source stride. 8197cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde src += src_stride * (height - 1); 8207cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde src_stride = -src_stride; 8217cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde TransposePlane(src, src_stride, dst, dst_stride, width, height); 8227cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 8237cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 82433cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampLIBYUV_API 8257cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordevoid RotatePlane270(const uint8* src, int src_stride, 8267cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst, int dst_stride, 8277cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int width, int height) { 8287cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Rotate by 270 is a transpose with the destination written 82933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // from bottom to top. So set the destination pointer to the end 8307cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // of the buffer and flip the sign of the destination stride. 8317cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst += dst_stride * (width - 1); 8327cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_stride = -dst_stride; 8337cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde TransposePlane(src, src_stride, dst, dst_stride, width, height); 8347cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 8357cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 83633cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampLIBYUV_API 8377cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordevoid RotatePlane180(const uint8* src, int src_stride, 8387cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst, int dst_stride, 8397cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int width, int height) { 84033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C; 84133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(HAS_MIRRORROW_NEON) 84233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (TestCpuFlag(kCpuHasNEON)) { 84333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp MirrorRow = MirrorRow_NEON; 84433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 8457cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#endif 84633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(HAS_MIRRORROW_SSE2) 84733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (TestCpuFlag(kCpuHasSSE2) && 84833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(width, 16) && 84933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && 85033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { 85133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp MirrorRow = MirrorRow_SSE2; 85233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 8537cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#endif 85433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(HAS_MIRRORROW_SSSE3) 85533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (TestCpuFlag(kCpuHasSSSE3) && 85633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(width, 16) && 85733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && 85833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { 85933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp MirrorRow = MirrorRow_SSSE3; 8607cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 86133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 86233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; 86333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(HAS_COPYROW_NEON) 86433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 64)) { 86533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp CopyRow = CopyRow_NEON; 86633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 86733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 86833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(HAS_COPYROW_X86) 86933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { 87033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp CopyRow = CopyRow_X86; 87133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 87233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 87333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(HAS_COPYROW_SSE2) 87433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && 87533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && 87633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { 87733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp CopyRow = CopyRow_SSE2; 87833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 87933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 88033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (width > kMaxStride) { 88133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp return; 88233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 88333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // Swap first and last row and mirror the content. Uses a temporary row. 88433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp SIMD_ALIGNED(uint8 row[kMaxStride]); 88533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* src_bot = src + src_stride * (height - 1); 88633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_bot = dst + dst_stride * (height - 1); 88733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int half_height = (height + 1) >> 1; 88833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // Odd height will harmlessly mirror the middle row twice. 88933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int y = 0; y < half_height; ++y) { 89033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp MirrorRow(src, row, width); // Mirror first row into a buffer 89133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src += src_stride; 89233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp MirrorRow(src_bot, dst, width); // Mirror last row into first row 8937cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst += dst_stride; 89433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp CopyRow(row, dst_bot, width); // Copy first mirrored row into last 89533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_bot -= src_stride; 89633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_bot -= dst_stride; 8977cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 8987cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 8997cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 9007cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordestatic void TransposeUVWx8_C(const uint8* src, int src_stride, 9017cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_a, int dst_stride_a, 9027cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_b, int dst_stride_b, 90333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 90433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int i = 0; i < width; ++i) { 9057cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_a[0] = src[0 * src_stride + 0]; 9067cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_b[0] = src[0 * src_stride + 1]; 9077cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_a[1] = src[1 * src_stride + 0]; 9087cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_b[1] = src[1 * src_stride + 1]; 9097cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_a[2] = src[2 * src_stride + 0]; 9107cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_b[2] = src[2 * src_stride + 1]; 9117cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_a[3] = src[3 * src_stride + 0]; 9127cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_b[3] = src[3 * src_stride + 1]; 9137cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_a[4] = src[4 * src_stride + 0]; 9147cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_b[4] = src[4 * src_stride + 1]; 9157cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_a[5] = src[5 * src_stride + 0]; 9167cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_b[5] = src[5 * src_stride + 1]; 9177cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_a[6] = src[6 * src_stride + 0]; 9187cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_b[6] = src[6 * src_stride + 1]; 9197cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_a[7] = src[7 * src_stride + 0]; 9207cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_b[7] = src[7 * src_stride + 1]; 9217cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde src += 2; 9227cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_a += dst_stride_a; 9237cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_b += dst_stride_b; 9247cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 9257cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 9267cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 9277cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordestatic void TransposeUVWxH_C(const uint8* src, int src_stride, 9287cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_a, int dst_stride_a, 9297cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_b, int dst_stride_b, 93033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width, int height) { 93133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int i = 0; i < width * 2; i += 2) 93233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int j = 0; j < height; ++j) { 9337cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)]; 9347cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1]; 9357cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 9367cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 9377cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 93833cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampLIBYUV_API 9397cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordevoid TransposeUV(const uint8* src, int src_stride, 9407cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_a, int dst_stride_a, 9417cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_b, int dst_stride_b, 9427cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int width, int height) { 94333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp void (*TransposeUVWx8)(const uint8* src, int src_stride, 94433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_a, int dst_stride_a, 94533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_b, int dst_stride_b, 94633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) = TransposeUVWx8_C; 9477cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde#if defined(HAS_TRANSPOSE_UVWX8_NEON) 94833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (TestCpuFlag(kCpuHasNEON)) { 94933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp TransposeUVWx8 = TransposeUVWx8_NEON; 95033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 95133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#elif defined(HAS_TRANSPOSE_UVWX8_SSE2) 95233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (TestCpuFlag(kCpuHasSSE2) && 95333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(width, 8) && 95433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) { 95533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp TransposeUVWx8 = TransposeUVWx8_SSE2; 9567cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 95733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 9587cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 95933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // Work through the source in 8x8 tiles. 96033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int i = height; 9617cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde while (i >= 8) { 96233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp TransposeUVWx8(src, src_stride, 96333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_a, dst_stride_a, 96433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_b, dst_stride_b, 96533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp width); 96633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src += 8 * src_stride; // Go down 8 rows. 96733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_a += 8; // Move over 8 columns. 96833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_b += 8; // Move over 8 columns. 96933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp i -= 8; 9707cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 9717cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 97233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp TransposeUVWxH_C(src, src_stride, 97333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_a, dst_stride_a, 97433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_b, dst_stride_b, 97533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp width, i); 9767cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 9777cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 97833cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampLIBYUV_API 9797cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordevoid RotateUV90(const uint8* src, int src_stride, 9807cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_a, int dst_stride_a, 9817cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_b, int dst_stride_b, 9827cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int width, int height) { 9837cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde src += src_stride * (height - 1); 9847cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde src_stride = -src_stride; 9857cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 9867cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde TransposeUV(src, src_stride, 9877cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_a, dst_stride_a, 9887cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_b, dst_stride_b, 9897cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde width, height); 9907cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 9917cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 99233cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampLIBYUV_API 9937cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordevoid RotateUV270(const uint8* src, int src_stride, 9947cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_a, int dst_stride_a, 9957cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_b, int dst_stride_b, 9967cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int width, int height) { 9977cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_a += dst_stride_a * (width - 1); 9987cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_b += dst_stride_b * (width - 1); 9997cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_stride_a = -dst_stride_a; 10007cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_stride_b = -dst_stride_b; 10017cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 10027cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde TransposeUV(src, src_stride, 10037cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_a, dst_stride_a, 10047cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_b, dst_stride_b, 10057cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde width, height); 10067cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 10077cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 100833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Rotate 180 is a horizontal and vertical flip. 100933cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampLIBYUV_API 10107cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordevoid RotateUV180(const uint8* src, int src_stride, 10117cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_a, int dst_stride_a, 10127cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_b, int dst_stride_b, 10137cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int width, int height) { 101433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) = 101533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp MirrorRowUV_C; 101633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(HAS_MIRRORROW_UV_NEON) 101733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (TestCpuFlag(kCpuHasNEON)) { 101833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp MirrorRowUV = MirrorRowUV_NEON; 10197cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 102033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#elif defined(HAS_MIRRORROW_UV_SSSE3) 102133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (TestCpuFlag(kCpuHasSSSE3) && 102233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(width, 16) && 102333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) { 102433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp MirrorRowUV = MirrorRowUV_SSSE3; 102533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 102633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 10277cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 10287cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_a += dst_stride_a * (height - 1); 10297cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_b += dst_stride_b * (height - 1); 10307cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 103133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int i = 0; i < height; ++i) { 103233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp MirrorRowUV(src, dst_a, dst_b, width); 103333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src += src_stride; 103433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_a -= dst_stride_a; 103533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst_b -= dst_stride_b; 10367cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 10377cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 10387cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 103933cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampLIBYUV_API 10407cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordeint I420Rotate(const uint8* src_y, int src_stride_y, 10417cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde const uint8* src_u, int src_stride_u, 10427cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde const uint8* src_v, int src_stride_v, 10437cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_y, int dst_stride_y, 10447cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_u, int dst_stride_u, 10457cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_v, int dst_stride_v, 10467cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int width, int height, 10477cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde RotationMode mode) { 104833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || 104933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp !dst_y || !dst_u || !dst_v) { 105033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp return -1; 105133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 10527cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int halfwidth = (width + 1) >> 1; 10537cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int halfheight = (height + 1) >> 1; 10547cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 10557cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Negative height means invert the image. 10567cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde if (height < 0) { 10577cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde height = -height; 10587cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde halfheight = (height + 1) >> 1; 10597cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde src_y = src_y + (height - 1) * src_stride_y; 10607cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde src_u = src_u + (halfheight - 1) * src_stride_u; 10617cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde src_v = src_v + (halfheight - 1) * src_stride_v; 10627cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde src_stride_y = -src_stride_y; 10637cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde src_stride_u = -src_stride_u; 10647cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde src_stride_v = -src_stride_v; 10657cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 10667cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 10677cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde switch (mode) { 10687cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde case kRotate0: 10697cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // copy frame 10707cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde return I420Copy(src_y, src_stride_y, 10717cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde src_u, src_stride_u, 10727cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde src_v, src_stride_v, 10737cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_y, dst_stride_y, 10747cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_u, dst_stride_u, 10757cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_v, dst_stride_v, 10767cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde width, height); 10777cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde case kRotate90: 10787cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde RotatePlane90(src_y, src_stride_y, 10797cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_y, dst_stride_y, 10807cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde width, height); 10817cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde RotatePlane90(src_u, src_stride_u, 10827cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_u, dst_stride_u, 10837cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde halfwidth, halfheight); 10847cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde RotatePlane90(src_v, src_stride_v, 10857cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_v, dst_stride_v, 10867cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde halfwidth, halfheight); 10877cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde return 0; 10887cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde case kRotate270: 10897cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde RotatePlane270(src_y, src_stride_y, 10907cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_y, dst_stride_y, 10917cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde width, height); 10927cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde RotatePlane270(src_u, src_stride_u, 10937cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_u, dst_stride_u, 10947cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde halfwidth, halfheight); 10957cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde RotatePlane270(src_v, src_stride_v, 10967cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_v, dst_stride_v, 10977cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde halfwidth, halfheight); 10987cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde return 0; 10997cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde case kRotate180: 11007cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde RotatePlane180(src_y, src_stride_y, 11017cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_y, dst_stride_y, 11027cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde width, height); 11037cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde RotatePlane180(src_u, src_stride_u, 11047cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_u, dst_stride_u, 11057cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde halfwidth, halfheight); 11067cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde RotatePlane180(src_v, src_stride_v, 11077cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_v, dst_stride_v, 11087cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde halfwidth, halfheight); 11097cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde return 0; 11107cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde default: 11117cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde break; 11127cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 11137cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde return -1; 11147cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 11157cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 111633cfdeb7b267ab635413797fffb046b73272f7ecHendrik DahlkampLIBYUV_API 11177cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Bordeint NV12ToI420Rotate(const uint8* src_y, int src_stride_y, 11187cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde const uint8* src_uv, int src_stride_uv, 11197cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_y, int dst_stride_y, 11207cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_u, int dst_stride_u, 11217cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde uint8* dst_v, int dst_stride_v, 11227cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int width, int height, 11237cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde RotationMode mode) { 112433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp if (!src_y || !src_uv || width <= 0 || height == 0 || 112533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp !dst_y || !dst_u || !dst_v) { 112633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp return -1; 112733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 11287cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int halfwidth = (width + 1) >> 1; 11297cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde int halfheight = (height + 1) >> 1; 11307cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 11317cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // Negative height means invert the image. 11327cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde if (height < 0) { 11337cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde height = -height; 11347cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde halfheight = (height + 1) >> 1; 11357cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde src_y = src_y + (height - 1) * src_stride_y; 11367cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde src_uv = src_uv + (halfheight - 1) * src_stride_uv; 11377cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde src_stride_y = -src_stride_y; 11387cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde src_stride_uv = -src_stride_uv; 11397cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 11407cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 11417cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde switch (mode) { 11427cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde case kRotate0: 11437cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde // copy frame 114433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp return NV12ToI420(src_y, src_stride_y, 114533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp src_uv, src_stride_uv, 11467cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_y, dst_stride_y, 11477cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_u, dst_stride_u, 11487cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_v, dst_stride_v, 11497cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde width, height); 11507cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde case kRotate90: 11517cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde RotatePlane90(src_y, src_stride_y, 11527cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_y, dst_stride_y, 11537cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde width, height); 11547cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde RotateUV90(src_uv, src_stride_uv, 11557cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_u, dst_stride_u, 11567cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_v, dst_stride_v, 11577cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde halfwidth, halfheight); 11587cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde return 0; 11597cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde case kRotate270: 11607cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde RotatePlane270(src_y, src_stride_y, 11617cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_y, dst_stride_y, 11627cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde width, height); 11637cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde RotateUV270(src_uv, src_stride_uv, 11647cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_u, dst_stride_u, 11657cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_v, dst_stride_v, 11667cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde halfwidth, halfheight); 11677cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde return 0; 11687cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde case kRotate180: 11697cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde RotatePlane180(src_y, src_stride_y, 11707cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_y, dst_stride_y, 11717cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde width, height); 11727cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde RotateUV180(src_uv, src_stride_uv, 11737cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_u, dst_stride_u, 11747cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde dst_v, dst_stride_v, 11757cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde halfwidth, halfheight); 11767cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde return 0; 11777cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde default: 11787cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde break; 11797cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde } 11807cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde return -1; 11817cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} 11827cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde 118333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef __cplusplus 118433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} // extern "C" 11857cd8149e2cbad8b1ff6d481c37a4775d3c8cf2faShri Borde} // namespace libyuv 118633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 1187