1f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang/* 2f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * Copyright 2013 The LibYuv Project Authors. All rights reserved. 3f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * 4f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * Use of this source code is governed by a BSD-style license 5f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * that can be found in the LICENSE file in the root of the source 6f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * tree. An additional intellectual property rights grant can be found 7f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * in the file PATENTS. All contributing project authors may 8f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * be found in the AUTHORS file in the root of the source tree. 9f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang */ 10f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 11f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#include "libyuv/rotate_row.h" 12cead1e07666bcc5914f8927712c2f89b9b789f9bFrank Barchard#include "libyuv/row.h" 13f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 14f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#ifdef __cplusplus 15f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuangnamespace libyuv { 16f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuangextern "C" { 17f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#endif 18f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 19f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// This module is for 32 bit Visual C x86 and clangcl 20f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) 21f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 22b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard__declspec(naked) void TransposeWx8_SSSE3(const uint8* src, 23b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int src_stride, 24b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst, 25b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int dst_stride, 26b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 27f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang __asm { 28f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang push edi 29f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang push esi 30f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang push ebp 31b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard mov eax, [esp + 12 + 4] // src 32b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard mov edi, [esp + 12 + 8] // src_stride 33f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov edx, [esp + 12 + 12] // dst 34f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov esi, [esp + 12 + 16] // dst_stride 35f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov ecx, [esp + 12 + 20] // width 36f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 37f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // Read in the data from the source pointer. 38f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // First round of bit swap. 39f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang align 4 40f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang convertloop: 41f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq xmm0, qword ptr [eax] 42f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea ebp, [eax + 8] 43f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq xmm1, qword ptr [eax + edi] 44f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea eax, [eax + 2 * edi] 45f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklbw xmm0, xmm1 46f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq xmm2, qword ptr [eax] 47f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm1, xmm0 48f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm1, xmm1, 8 49f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq xmm3, qword ptr [eax + edi] 50f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea eax, [eax + 2 * edi] 51f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklbw xmm2, xmm3 52f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm3, xmm2 53f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq xmm4, qword ptr [eax] 54f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm3, xmm3, 8 55f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq xmm5, qword ptr [eax + edi] 56f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklbw xmm4, xmm5 57f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea eax, [eax + 2 * edi] 58f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm5, xmm4 59f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq xmm6, qword ptr [eax] 60f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm5, xmm5, 8 61f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq xmm7, qword ptr [eax + edi] 62f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklbw xmm6, xmm7 63f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov eax, ebp 64f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm7, xmm6 65f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm7, xmm7, 8 66f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // Second round of bit swap. 67f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklwd xmm0, xmm2 68f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklwd xmm1, xmm3 69f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm2, xmm0 70f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm3, xmm1 71f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm2, xmm2, 8 72f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm3, xmm3, 8 73f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklwd xmm4, xmm6 74f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklwd xmm5, xmm7 75f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm6, xmm4 76f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm7, xmm5 77f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm6, xmm6, 8 78f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm7, xmm7, 8 79f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // Third round of bit swap. 80f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // Write to the destination pointer. 81f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckldq xmm0, xmm4 82f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq qword ptr [edx], xmm0 83f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm4, xmm0 84f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm4, xmm4, 8 85f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq qword ptr [edx + esi], xmm4 86f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea edx, [edx + 2 * esi] 87f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckldq xmm2, xmm6 88f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm6, xmm2 89f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm6, xmm6, 8 90f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq qword ptr [edx], xmm2 91f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckldq xmm1, xmm5 92f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq qword ptr [edx + esi], xmm6 93f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea edx, [edx + 2 * esi] 94f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm5, xmm1 95f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq qword ptr [edx], xmm1 96f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm5, xmm5, 8 97f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckldq xmm3, xmm7 98f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq qword ptr [edx + esi], xmm5 99f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea edx, [edx + 2 * esi] 100f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq qword ptr [edx], xmm3 101f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm7, xmm3 102f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm7, xmm7, 8 103f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang sub ecx, 8 104f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq qword ptr [edx + esi], xmm7 105f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea edx, [edx + 2 * esi] 106f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang jg convertloop 107f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 108f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang pop ebp 109f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang pop esi 110f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang pop edi 111f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ret 112f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang } 113f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang} 114f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 115b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard__declspec(naked) void TransposeUVWx8_SSE2(const uint8* src, 116b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int src_stride, 117b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_a, 118b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int dst_stride_a, 119b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_b, 120b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int dst_stride_b, 121b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int w) { 122f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang __asm { 123f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang push ebx 124f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang push esi 125f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang push edi 126f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang push ebp 127b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard mov eax, [esp + 16 + 4] // src 128b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard mov edi, [esp + 16 + 8] // src_stride 129f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov edx, [esp + 16 + 12] // dst_a 130f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov esi, [esp + 16 + 16] // dst_stride_a 131f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov ebx, [esp + 16 + 20] // dst_b 132f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov ebp, [esp + 16 + 24] // dst_stride_b 133f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov ecx, esp 134f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang sub esp, 4 + 16 135f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang and esp, ~15 136f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov [esp + 16], ecx 137f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov ecx, [ecx + 16 + 28] // w 138f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 139f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang align 4 140b33a82ffd059f362574ae038458e8dee26ac5a4aFrank Barchard // Read in the data from the source pointer. 141b33a82ffd059f362574ae038458e8dee26ac5a4aFrank Barchard // First round of bit swap. 142b33a82ffd059f362574ae038458e8dee26ac5a4aFrank Barchard convertloop: 143f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm0, [eax] 144f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm1, [eax + edi] 145f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea eax, [eax + 2 * edi] 146f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm7, xmm0 // use xmm7 as temp register. 147f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklbw xmm0, xmm1 148f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhbw xmm7, xmm1 149f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm1, xmm7 150f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm2, [eax] 151f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm3, [eax + edi] 152f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea eax, [eax + 2 * edi] 153f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm7, xmm2 154f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklbw xmm2, xmm3 155f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhbw xmm7, xmm3 156f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm3, xmm7 157f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm4, [eax] 158f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm5, [eax + edi] 159f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea eax, [eax + 2 * edi] 160f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm7, xmm4 161f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklbw xmm4, xmm5 162f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhbw xmm7, xmm5 163f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm5, xmm7 164f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm6, [eax] 165f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm7, [eax + edi] 166f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea eax, [eax + 2 * edi] 167f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu [esp], xmm5 // backup xmm5 168f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang neg edi 169b33a82ffd059f362574ae038458e8dee26ac5a4aFrank Barchard movdqa xmm5, xmm6 // use xmm5 as temp register. 170f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklbw xmm6, xmm7 171f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhbw xmm5, xmm7 172f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm7, xmm5 173f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea eax, [eax + 8 * edi + 16] 174f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang neg edi 175f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // Second round of bit swap. 176f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm5, xmm0 177f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklwd xmm0, xmm2 178f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhwd xmm5, xmm2 179f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm2, xmm5 180f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm5, xmm1 181f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklwd xmm1, xmm3 182f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhwd xmm5, xmm3 183f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm3, xmm5 184f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm5, xmm4 185f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklwd xmm4, xmm6 186f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhwd xmm5, xmm6 187f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm6, xmm5 188f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm5, [esp] // restore xmm5 189f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu [esp], xmm6 // backup xmm6 190b33a82ffd059f362574ae038458e8dee26ac5a4aFrank Barchard movdqa xmm6, xmm5 // use xmm6 as temp register. 191f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklwd xmm5, xmm7 192f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhwd xmm6, xmm7 193f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm7, xmm6 194b33a82ffd059f362574ae038458e8dee26ac5a4aFrank Barchard 195f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // Third round of bit swap. 196f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // Write to the destination pointer. 197f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm6, xmm0 198f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckldq xmm0, xmm4 199f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhdq xmm6, xmm4 200f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm4, xmm6 201f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm6, [esp] // restore xmm6 202f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movlpd qword ptr [edx], xmm0 203f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movhpd qword ptr [ebx], xmm0 204f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movlpd qword ptr [edx + esi], xmm4 205f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea edx, [edx + 2 * esi] 206f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movhpd qword ptr [ebx + ebp], xmm4 207f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea ebx, [ebx + 2 * ebp] 208b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard movdqa xmm0, xmm2 // use xmm0 as the temp register. 209f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckldq xmm2, xmm6 210f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movlpd qword ptr [edx], xmm2 211f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movhpd qword ptr [ebx], xmm2 212f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhdq xmm0, xmm6 213f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movlpd qword ptr [edx + esi], xmm0 214f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea edx, [edx + 2 * esi] 215f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movhpd qword ptr [ebx + ebp], xmm0 216f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea ebx, [ebx + 2 * ebp] 217b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard movdqa xmm0, xmm1 // use xmm0 as the temp register. 218f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckldq xmm1, xmm5 219f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movlpd qword ptr [edx], xmm1 220f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movhpd qword ptr [ebx], xmm1 221f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhdq xmm0, xmm5 222f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movlpd qword ptr [edx + esi], xmm0 223f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea edx, [edx + 2 * esi] 224f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movhpd qword ptr [ebx + ebp], xmm0 225f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea ebx, [ebx + 2 * ebp] 226b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard movdqa xmm0, xmm3 // use xmm0 as the temp register. 227f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckldq xmm3, xmm7 228f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movlpd qword ptr [edx], xmm3 229f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movhpd qword ptr [ebx], xmm3 230f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhdq xmm0, xmm7 231f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang sub ecx, 8 232f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movlpd qword ptr [edx + esi], xmm0 233f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea edx, [edx + 2 * esi] 234f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movhpd qword ptr [ebx + ebp], xmm0 235f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea ebx, [ebx + 2 * ebp] 236f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang jg convertloop 237f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 238f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov esp, [esp + 16] 239f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang pop ebp 240f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang pop edi 241f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang pop esi 242f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang pop ebx 243f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ret 244f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang } 245f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang} 246f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 247f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) 248f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 249f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#ifdef __cplusplus 250f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang} // extern "C" 251f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang} // namespace libyuv 252f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#endif 253