1f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang/* 2f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * Copyright 2013 The LibYuv Project Authors. All rights reserved. 3f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * 4f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * Use of this source code is governed by a BSD-style license 5f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * that can be found in the LICENSE file in the root of the source 6f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * tree. An additional intellectual property rights grant can be found 7f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * in the file PATENTS. All contributing project authors may 8f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang * be found in the AUTHORS file in the root of the source tree. 9f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang */ 10f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 11f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#include "libyuv/rotate_row.h" 12cead1e07666bcc5914f8927712c2f89b9b789f9bFrank Barchard#include "libyuv/row.h" 13f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 14f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#ifdef __cplusplus 15f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuangnamespace libyuv { 16f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuangextern "C" { 17f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#endif 18f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 19f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang// This module is for 32 bit Visual C x86 and clangcl 20f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) 21f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 22b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard__declspec(naked) void TransposeWx8_SSSE3(const uint8* src, 23b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int src_stride, 24b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst, 25b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int dst_stride, 26b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int width) { 27f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang __asm { 28f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang push edi 29f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang push esi 30f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang push ebp 31b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard mov eax, [esp + 12 + 4] // src 32b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard mov edi, [esp + 12 + 8] // src_stride 33f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov edx, [esp + 12 + 12] // dst 34f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov esi, [esp + 12 + 16] // dst_stride 35f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov ecx, [esp + 12 + 20] // width 36f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 37f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // Read in the data from the source pointer. 38f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // First round of bit swap. 39f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang align 4 40f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang convertloop: 41f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq xmm0, qword ptr [eax] 42f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea ebp, [eax + 8] 43f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq xmm1, qword ptr [eax + edi] 44f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea eax, [eax + 2 * edi] 45f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklbw xmm0, xmm1 46f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq xmm2, qword ptr [eax] 47f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm1, xmm0 48f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm1, xmm1, 8 49f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq xmm3, qword ptr [eax + edi] 50f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea eax, [eax + 2 * edi] 51f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklbw xmm2, xmm3 52f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm3, xmm2 53f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq xmm4, qword ptr [eax] 54f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm3, xmm3, 8 55f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq xmm5, qword ptr [eax + edi] 56f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklbw xmm4, xmm5 57f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea eax, [eax + 2 * edi] 58f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm5, xmm4 59f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq xmm6, qword ptr [eax] 60f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm5, xmm5, 8 61f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq xmm7, qword ptr [eax + edi] 62f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklbw xmm6, xmm7 63f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov eax, ebp 64f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm7, xmm6 65f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm7, xmm7, 8 66f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // Second round of bit swap. 67f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklwd xmm0, xmm2 68f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklwd xmm1, xmm3 69f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm2, xmm0 70f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm3, xmm1 71f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm2, xmm2, 8 72f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm3, xmm3, 8 73f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklwd xmm4, xmm6 74f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklwd xmm5, xmm7 75f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm6, xmm4 76f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm7, xmm5 77f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm6, xmm6, 8 78f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm7, xmm7, 8 79f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // Third round of bit swap. 80f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // Write to the destination pointer. 81f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckldq xmm0, xmm4 82f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq qword ptr [edx], xmm0 83f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm4, xmm0 84f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm4, xmm4, 8 85f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq qword ptr [edx + esi], xmm4 86f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea edx, [edx + 2 * esi] 87f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckldq xmm2, xmm6 88f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm6, xmm2 89f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm6, xmm6, 8 90f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq qword ptr [edx], xmm2 91f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckldq xmm1, xmm5 92f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq qword ptr [edx + esi], xmm6 93f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea edx, [edx + 2 * esi] 94f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm5, xmm1 95f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq qword ptr [edx], xmm1 96f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm5, xmm5, 8 97f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckldq xmm3, xmm7 98f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq qword ptr [edx + esi], xmm5 99f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea edx, [edx + 2 * esi] 100f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq qword ptr [edx], xmm3 101f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm7, xmm3 102f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang palignr xmm7, xmm7, 8 103f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang sub ecx, 8 104f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movq qword ptr [edx + esi], xmm7 105f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea edx, [edx + 2 * esi] 106f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang jg convertloop 107f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 108f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang pop ebp 109f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang pop esi 110f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang pop edi 111f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ret 112f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang } 113f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang} 114f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 115b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard__declspec(naked) void TransposeUVWx8_SSE2(const uint8* src, 116b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int src_stride, 117b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_a, 118b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int dst_stride_a, 119b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard uint8* dst_b, 120b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int dst_stride_b, 121b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard int w) { 122f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang __asm { 123f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang push ebx 124f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang push esi 125f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang push edi 126f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang push ebp 127b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard mov eax, [esp + 16 + 4] // src 128b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard mov edi, [esp + 16 + 8] // src_stride 129f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov edx, [esp + 16 + 12] // dst_a 130f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov esi, [esp + 16 + 16] // dst_stride_a 131f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov ebx, [esp + 16 + 20] // dst_b 132f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov ebp, [esp + 16 + 24] // dst_stride_b 133f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov ecx, esp 134f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang sub esp, 4 + 16 135f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang and esp, ~15 136f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov [esp + 16], ecx 137f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov ecx, [ecx + 16 + 28] // w 138f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 139f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang align 4 140f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang convertloop: 141b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard // Read in the data from the source pointer. 142b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard // First round of bit swap. 143f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm0, [eax] 144f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm1, [eax + edi] 145f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea eax, [eax + 2 * edi] 146f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm7, xmm0 // use xmm7 as temp register. 147f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklbw xmm0, xmm1 148f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhbw xmm7, xmm1 149f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm1, xmm7 150f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm2, [eax] 151f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm3, [eax + edi] 152f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea eax, [eax + 2 * edi] 153f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm7, xmm2 154f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklbw xmm2, xmm3 155f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhbw xmm7, xmm3 156f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm3, xmm7 157f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm4, [eax] 158f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm5, [eax + edi] 159f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea eax, [eax + 2 * edi] 160f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm7, xmm4 161f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklbw xmm4, xmm5 162f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhbw xmm7, xmm5 163f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm5, xmm7 164f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm6, [eax] 165f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm7, [eax + edi] 166f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea eax, [eax + 2 * edi] 167f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu [esp], xmm5 // backup xmm5 168f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang neg edi 169b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard movdqa xmm5, xmm6 // use xmm5 as temp register. 170f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklbw xmm6, xmm7 171f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhbw xmm5, xmm7 172f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm7, xmm5 173f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea eax, [eax + 8 * edi + 16] 174f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang neg edi 175f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // Second round of bit swap. 176f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm5, xmm0 177f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklwd xmm0, xmm2 178f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhwd xmm5, xmm2 179f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm2, xmm5 180f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm5, xmm1 181f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklwd xmm1, xmm3 182f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhwd xmm5, xmm3 183f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm3, xmm5 184f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm5, xmm4 185f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklwd xmm4, xmm6 186f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhwd xmm5, xmm6 187f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm6, xmm5 188f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm5, [esp] // restore xmm5 189f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu [esp], xmm6 // backup xmm6 190b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard movdqa xmm6, xmm5 // use xmm6 as temp register. 191f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpcklwd xmm5, xmm7 192f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhwd xmm6, xmm7 193f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm7, xmm6 194f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // Third round of bit swap. 195f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang // Write to the destination pointer. 196f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm6, xmm0 197f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckldq xmm0, xmm4 198f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhdq xmm6, xmm4 199f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqa xmm4, xmm6 200f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movdqu xmm6, [esp] // restore xmm6 201f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movlpd qword ptr [edx], xmm0 202f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movhpd qword ptr [ebx], xmm0 203f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movlpd qword ptr [edx + esi], xmm4 204f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea edx, [edx + 2 * esi] 205f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movhpd qword ptr [ebx + ebp], xmm4 206f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea ebx, [ebx + 2 * ebp] 207b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard movdqa xmm0, xmm2 // use xmm0 as the temp register. 208f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckldq xmm2, xmm6 209f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movlpd qword ptr [edx], xmm2 210f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movhpd qword ptr [ebx], xmm2 211f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhdq xmm0, xmm6 212f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movlpd qword ptr [edx + esi], xmm0 213f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea edx, [edx + 2 * esi] 214f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movhpd qword ptr [ebx + ebp], xmm0 215f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea ebx, [ebx + 2 * ebp] 216b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard movdqa xmm0, xmm1 // use xmm0 as the temp register. 217f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckldq xmm1, xmm5 218f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movlpd qword ptr [edx], xmm1 219f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movhpd qword ptr [ebx], xmm1 220f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhdq xmm0, xmm5 221f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movlpd qword ptr [edx + esi], xmm0 222f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea edx, [edx + 2 * esi] 223f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movhpd qword ptr [ebx + ebp], xmm0 224f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea ebx, [ebx + 2 * ebp] 225b83bb38f0a92bedeb52baa31e515220927ef53bbFrank Barchard movdqa xmm0, xmm3 // use xmm0 as the temp register. 226f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckldq xmm3, xmm7 227f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movlpd qword ptr [edx], xmm3 228f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movhpd qword ptr [ebx], xmm3 229f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang punpckhdq xmm0, xmm7 230f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang sub ecx, 8 231f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movlpd qword ptr [edx + esi], xmm0 232f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea edx, [edx + 2 * esi] 233f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang movhpd qword ptr [ebx + ebp], xmm0 234f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang lea ebx, [ebx + 2 * ebp] 235f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang jg convertloop 236f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 237f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang mov esp, [esp + 16] 238f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang pop ebp 239f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang pop edi 240f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang pop esi 241f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang pop ebx 242f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang ret 243f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang } 244f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang} 245f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 246f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) 247f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang 248f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#ifdef __cplusplus 249f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang} // extern "C" 250f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang} // namespace libyuv 251f047e7ca6983218eed7703c7afd51fed7bd3b5c9Hangyu Kuang#endif 252