190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/* 2f71323e297a928af368937089d3ed71239786f86Andreas Huber * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 4f71323e297a928af368937089d3ed71239786f86Andreas Huber * Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber * that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber * tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber * in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber * be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber */ 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/**************************************************************************** 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber* 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber* Module Title : scaleopt.cpp 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber* 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber* Description : Optimized scaling functions 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber* 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber****************************************************************************/ 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "pragmas.h" 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/**************************************************************************** 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber* Module Statics 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber****************************************************************************/ 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short one_fifth[] = { 51, 51, 51, 51 }; 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short two_fifths[] = { 102, 102, 102, 102 }; 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short three_fifths[] = { 154, 154, 154, 154 }; 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short four_fifths[] = { 205, 205, 205, 205 }; 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short round_values[] = { 128, 128, 128, 128 }; 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short four_ones[] = { 1, 1, 1, 1}; 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short const45_2[] = {205, 154, 102, 51 }; 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short const45_1[] = { 51, 102, 154, 205 }; 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned char mask45[] = { 0, 0, 0, 0, 0, 0, 255, 0}; 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short const35_2[] = { 154, 51, 205, 102 }; 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short const35_1[] = { 102, 205, 51, 154 }; 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "vpx_scale/vpxscale.h" 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "vpx_mem/vpx_mem.h" 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/**************************************************************************** 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * ROUTINE : horizontal_line_3_5_scale_mmx 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * INPUTS : const unsigned char *source : 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int source_width : 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned char *dest : 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int dest_width : 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * OUTPUTS : None. 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * RETURNS : void 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * FUNCTION : 3 to 5 up-scaling of a horizontal line of pixels. 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * SPECIAL NOTES : None. 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ****************************************************************************/ 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid horizontal_line_3_5_scale_mmx 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber const unsigned char *source, 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int source_width, 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned char *dest, 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_width 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber (void) dest_width; 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber __asm 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push ebx 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov esi, source 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edi, dest 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ecx, source_width 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea edx, [esi+ecx-3]; 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 // clear mm7 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber horiz_line_3_5_loop: 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov eax, DWORD PTR [esi] // eax = 00 01 02 03 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ebx, eax 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber and ebx, 0xffff00 // ebx = xx 01 02 xx 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ecx, eax // ecx = 00 01 02 03 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber and eax, 0xffff0000 // eax = xx xx 02 03 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber xor ecx, eax // ecx = 00 01 xx xx 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber shr ebx, 8 // ebx = 01 02 xx xx 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber or eax, ebx // eax = 01 02 02 03 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber shl ebx, 16 // ebx = xx xx 01 02 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber or ebx, ecx // ebx = 00 01 01 02 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm0, ebx // mm0 = 00 01 01 02 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm1, mm6 // 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm0, mm5 // 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov [edi], ebx // writeoutput 00 xx xx xx 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add esi, 3 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add edi, 5 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm1 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm4 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm0, 8 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp esi, edx 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm0, mm7 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD Ptr [edi-4], mm0 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jl horiz_line_3_5_loop 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber//Exit: 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov eax, DWORD PTR [esi] // eax = 00 01 02 03 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ebx, eax 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber and ebx, 0xffff00 // ebx = xx 01 02 xx 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ecx, eax // ecx = 00 01 02 03 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber and eax, 0xffff0000 // eax = xx xx 02 03 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber xor ecx, eax // ecx = 00 01 xx xx 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber shr ebx, 8 // ebx = 01 02 xx xx 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber or eax, ebx // eax = 01 02 02 03 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber shl eax, 8 // eax = xx 01 02 02 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber and eax, 0xffff0000 // eax = xx xx 02 02 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber or eax, ebx // eax = 01 02 02 02 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber shl ebx, 16 // ebx = xx xx 01 02 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber or ebx, ecx // ebx = 00 01 01 02 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm0, ebx // mm0 = 00 01 01 02 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm1, mm6 // 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm0, mm5 // 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov [edi], ebx // writeoutput 00 xx xx xx 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm1 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm4 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm0, 8 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm0, mm7 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD Ptr [edi+1], mm0 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop ebx 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/**************************************************************************** 17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * ROUTINE : horizontal_line_4_5_scale_mmx 17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * INPUTS : const unsigned char *source : 18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int source_width : 18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned char *dest : 18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int dest_width : 18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * OUTPUTS : None. 18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * RETURNS : void 18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * FUNCTION : 4 to 5 up-scaling of a horizontal line of pixels. 19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * SPECIAL NOTES : None. 19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ****************************************************************************/ 19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic 19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid horizontal_line_4_5_scale_mmx 19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber const unsigned char *source, 19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int source_width, 19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned char *dest, 20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_width 20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber (void)dest_width; 20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber __asm 20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov esi, source 20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edi, dest 21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ecx, source_width 21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea edx, [esi+ecx-8]; 21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx 21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx 21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx 21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 // clear mm7 21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber horiz_line_4_5_loop: 22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07 22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08 22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08 22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [edi], mm0 // write output 00 xx xx xx 22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx 23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx 23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx 23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx 23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx 24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51 24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm1 // added round values 24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm4 24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx 24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm0, mm7 24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add edi, 10 25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add esi, 8 25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm3 // 25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm4 // added round values 25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp esi, edx 25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm2, 8 25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm2, mm7 26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09 26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jl horiz_line_4_5_loop 26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber//Exit: 26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07 26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07 26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00 27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00 27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00 27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07 27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07 27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm1 27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [edi], mm0 // write output 00 xx xx xx 28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx 28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx 28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx 28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx 28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx 29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51 29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm1 // added round values 29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm4 29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx 29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx 29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm3 // 30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm4 // added round values 30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm2, 8 30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm2, mm7 30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09 30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/**************************************************************************** 31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * ROUTINE : vertical_band_4_5_scale_mmx 31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * INPUTS : unsigned char *dest : 31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int dest_pitch : 31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int dest_width : 32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * OUTPUTS : None. 32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * RETURNS : void 32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * FUNCTION : 4 to 5 up-scaling of a 4 pixel high band of pixels. 32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * SPECIAL NOTES : The routine uses the first line of the band below 32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * the current band. The function also has a "C" only 32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * version. 33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ****************************************************************************/ 33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic 33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vertical_band_4_5_scale_mmx 33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned char *dest, 33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_pitch, 33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_width 33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber __asm 34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov esi, dest // Get the source and destination pointer 34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ecx, dest_pitch // Get the pitch size 34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea edi, [esi+ecx*2] // tow lines below 34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add edi, ecx // three lines below 34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 // clear out mm7 35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edx, dest_width // Loop counter 35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vs_4_5_loop: 35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, QWORD ptr [esi] // src[0]; 35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, QWORD ptr [esi+ecx] // src[1]; 35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm0 // Make a copy 35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 // unpack low to word 35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, one_fifth 36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm2, mm7 // unpack high to word 36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm0, mm5 // a * 1/5 36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm1 // make a copy 36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 // unpack low to word 36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm2, mm5 // a * 1/5 36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, four_fifths // constan 37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm1 // copy of low b 37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm6 // b * 4/5 37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm3, mm7 // unpack high to word 37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm3 // copy of high b 37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, mm6 // b * 4/5 37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm4 // a * 1/5 + b * 4/5 37990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm5 // a * 1/5 + b * 4/5 38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, round_values // + 128 38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 38390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, round_values // + 128 38490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm0, 8 38590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 38690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm2, 8 38790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm0, mm2 // des [1] 38890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 38990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq QWORD ptr [esi+ecx], mm0 // write des[1] 39090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, [esi+ecx*2] // mm0 = src[2] 39190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 39290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm1, mm3 --- Src[1] 39390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm0 --- Src[2] 39490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm7 for unpacking 39590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 39690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, two_fifths 39790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm0 // make a copy 39890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 39990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm1, mm5 // b * 2/5 40090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, three_fifths 40190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 // unpack low to word 40490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, mm5 // b * 2/5 40590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm0 // make copy of c 40790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm2, mm7 // unpack high to word 40890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm6 // c * 3/5 41090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm2 41190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, mm6 // c * 3/5 41390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, mm4 // b * 2/5 + c * 3/5 41490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm5 // b * 2/5 + c * 3/5 41690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, round_values // + 128 41790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, round_values // + 128 41990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm1, 8 42090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 42190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm3, 8 42290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm1, mm3 // des[2] 42390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 42490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq QWORD ptr [esi+ecx*2], mm1 // write des[2] 42590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, [edi] // mm1=Src[3]; 42690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 42790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm0, mm2 --- Src[2] 42890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm1 --- Src[3] 42990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm6 --- 3/5 43090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm7 for unpacking 43190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 43290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm0, mm6 // c * 3/5 43390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, two_fifths // mm5 = 2/5 43490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 43590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm1 // make a copy 43690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm2, mm6 // c * 3/5 43790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 43890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 // unpack low 43990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm1 // make a copy 44090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 44190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm3, mm7 // unpack high 44290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm5 // d * 2/5 44390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 44490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, mm3 // make a copy 44590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm6, mm5 // d * 2/5 44690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 44790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm4 // c * 3/5 + d * 2/5 44890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm6 // c * 3/5 + d * 2/5 44990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 45090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, round_values // + 128 45190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, round_values // + 128 45290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 45390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm0, 8 45490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm2, 8 45590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 45690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm0, mm2 // des[3] 45790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq QWORD ptr [edi], mm0 // write des[3] 45890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 45990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm1, mm3 --- Src[3] 46090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm7 -- cleared for unpacking 46190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 46290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group 46390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 46490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, four_fifths // mm5 = 4/5 46590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm1, mm5 // d * 4/5 46690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 46790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, one_fifth // mm6 = 1/5 46890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm0 // make a copy 46990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 47090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, mm5 // d * 4/5 47190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 // unpack low 47290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 47390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm0, mm6 // an * 1/5 47490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm2, mm7 // unpack high 47590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 47690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, mm0 // d * 4/5 + an * 1/5 47790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm2, mm6 // an * 1/5 47890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 47990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm2 // d * 4/5 + an * 1/5 48090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, round_values // + 128 48190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 48290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, round_values // + 128 48390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm1, 8 48490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 48590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm3, 8 48690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm1, mm3 // des[4] 48790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 48890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq QWORD ptr [edi+ecx], mm1 // write des[4] 48990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 49090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add edi, 8 49190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add esi, 8 49290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 49390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub edx, 8 49490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jg vs_4_5_loop 49590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 49690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 49790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 49890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/**************************************************************************** 49990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 50090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * ROUTINE : last_vertical_band_4_5_scale_mmx 50190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 50290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * INPUTS : unsigned char *dest : 50390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int dest_pitch : 50490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int dest_width : 50590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 50690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * OUTPUTS : None. 50790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 50890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * RETURNS : None 50990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 51090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * FUNCTION : 4 to 5 up-scaling of the last 4-pixel high band in an image. 51190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 51290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * SPECIAL NOTES : The routine uses the first line of the band below 51390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * the current band. The function also has an "C" only 51490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * version. 51590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 51690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ****************************************************************************/ 51790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic 51890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid last_vertical_band_4_5_scale_mmx 51990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 52090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned char *dest, 52190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_pitch, 52290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_width 52390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 52490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 52590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber __asm 52690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 52790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov esi, dest // Get the source and destination pointer 52890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ecx, dest_pitch // Get the pitch size 52990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 53090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea edi, [esi+ecx*2] // tow lines below 53190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add edi, ecx // three lines below 53290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 53390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 // clear out mm7 53490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edx, dest_width // Loop counter 53590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 53690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber last_vs_4_5_loop: 53790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 53890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, QWORD ptr [esi] // src[0]; 53990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, QWORD ptr [esi+ecx] // src[1]; 54090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 54190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm0 // Make a copy 54290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 // unpack low to word 54390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 54490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, one_fifth 54590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm2, mm7 // unpack high to word 54690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 54790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm0, mm5 // a * 1/5 54890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 54990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm1 // make a copy 55090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 // unpack low to word 55190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 55290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm2, mm5 // a * 1/5 55390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, four_fifths // constan 55490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 55590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm1 // copy of low b 55690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm6 // b * 4/5 55790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 55890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm3, mm7 // unpack high to word 55990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm3 // copy of high b 56090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 56190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, mm6 // b * 4/5 56290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm4 // a * 1/5 + b * 4/5 56390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 56490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm5 // a * 1/5 + b * 4/5 56590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, round_values // + 128 56690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 56790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, round_values // + 128 56890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm0, 8 56990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 57090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm2, 8 57190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm0, mm2 // des [1] 57290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 57390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq QWORD ptr [esi+ecx], mm0 // write des[1] 57490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, [esi+ecx*2] // mm0 = src[2] 57590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 57690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm1, mm3 --- Src[1] 57790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm0 --- Src[2] 57890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm7 for unpacking 57990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 58090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, two_fifths 58190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm0 // make a copy 58290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 58390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm1, mm5 // b * 2/5 58490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, three_fifths 58590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 58690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 58790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 // unpack low to word 58890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, mm5 // b * 2/5 58990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 59090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm0 // make copy of c 59190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm2, mm7 // unpack high to word 59290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 59390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm6 // c * 3/5 59490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm2 59590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 59690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, mm6 // c * 3/5 59790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, mm4 // b * 2/5 + c * 3/5 59890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 59990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm5 // b * 2/5 + c * 3/5 60090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, round_values // + 128 60190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 60290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, round_values // + 128 60390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm1, 8 60490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 60590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm3, 8 60690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm1, mm3 // des[2] 60790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 60890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq QWORD ptr [esi+ecx*2], mm1 // write des[2] 60990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, [edi] // mm1=Src[3]; 61090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 61190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq QWORD ptr [edi+ecx], mm1 // write des[4]; 61290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 61390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm0, mm2 --- Src[2] 61490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm1 --- Src[3] 61590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm6 --- 3/5 61690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm7 for unpacking 61790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 61890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm0, mm6 // c * 3/5 61990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, two_fifths // mm5 = 2/5 62090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 62190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm1 // make a copy 62290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm2, mm6 // c * 3/5 62390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 62490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 // unpack low 62590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm1 // make a copy 62690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 62790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm3, mm7 // unpack high 62890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm5 // d * 2/5 62990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 63090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, mm3 // make a copy 63190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm6, mm5 // d * 2/5 63290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 63390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm4 // c * 3/5 + d * 2/5 63490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm6 // c * 3/5 + d * 2/5 63590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 63690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, round_values // + 128 63790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, round_values // + 128 63890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 63990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm0, 8 64090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm2, 8 64190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 64290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm0, mm2 // des[3] 64390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq QWORD ptr [edi], mm0 // write des[3] 64490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 64590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm1, mm3 --- Src[3] 64690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm7 -- cleared for unpacking 64790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add edi, 8 64890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add esi, 8 64990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 65090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub edx, 8 65190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jg last_vs_4_5_loop 65290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 65390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 65490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 65590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/**************************************************************************** 65690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 65790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * ROUTINE : vertical_band_3_5_scale_mmx 65890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 65990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * INPUTS : unsigned char *dest : 66090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int dest_pitch : 66190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int dest_width : 66290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 66390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * OUTPUTS : None. 66490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 66590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * RETURNS : void 66690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 66790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * FUNCTION : 3 to 5 up-scaling of a 3-pixel high band of pixels. 66890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 66990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * SPECIAL NOTES : The routine uses the first line of the band below 67090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * the current band. The function also has an "C" only 67190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * version. 67290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 67390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ****************************************************************************/ 67490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic 67590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vertical_band_3_5_scale_mmx 67690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 67790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned char *dest, 67890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_pitch, 67990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_width 68090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 68190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 68290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber __asm 68390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 68490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov esi, dest // Get the source and destination pointer 68590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ecx, dest_pitch // Get the pitch size 68690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 68790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea edi, [esi+ecx*2] // tow lines below 68890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add edi, ecx // three lines below 68990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 69090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 // clear out mm7 69190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edx, dest_width // Loop counter 69290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 69390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vs_3_5_loop: 69490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 69590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, QWORD ptr [esi] // src[0]; 69690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, QWORD ptr [esi+ecx] // src[1]; 69790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 69890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm0 // Make a copy 69990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 // unpack low to word 70090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 70190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, two_fifths // mm5 = 2/5 70290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm2, mm7 // unpack high to word 70390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 70490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm0, mm5 // a * 2/5 70590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 70690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm1 // make a copy 70790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 // unpack low to word 70890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 70990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm2, mm5 // a * 2/5 71090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, three_fifths // mm6 = 3/5 71190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 71290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm1 // copy of low b 71390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm6 // b * 3/5 71490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 71590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm3, mm7 // unpack high to word 71690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm3 // copy of high b 71790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 71890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, mm6 // b * 3/5 71990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm4 // a * 2/5 + b * 3/5 72090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 72190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm5 // a * 2/5 + b * 3/5 72290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, round_values // + 128 72390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 72490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, round_values // + 128 72590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm0, 8 72690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 72790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm2, 8 72890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm0, mm2 // des [1] 72990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 73090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq QWORD ptr [esi+ecx], mm0 // write des[1] 73190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, [esi+ecx*2] // mm0 = src[2] 73290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 73390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm1, mm3 --- Src[1] 73490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm0 --- Src[2] 73590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm7 for unpacking 73690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 73790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm1 // b low 73890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm1, four_fifths // b * 4/5 low 73990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 74090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm3 // b high 74190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, four_fifths // b * 4/5 high 74290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 74390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm0 // c 74490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, one_fifth // b * 1/5 74590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 74690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 // c low 74790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, one_fifth // b * 1/5 74890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 74990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, mm0 // make copy of c low 75090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm2, mm7 // c high 75190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 75290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm6, one_fifth // c * 1/5 low 75390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm7, mm2 // make copy of c high 75490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 75590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm7, one_fifth // c * 1/5 high 75690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, mm6 // b * 4/5 + c * 1/5 low 75790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 75890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm7 // b * 4/5 + c * 1/5 high 75990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, mm0 // make copy of c low 76090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 76190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm6, four_fifths // c * 4/5 low 76290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm7, mm2 // make copy of c high 76390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 76490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm7, four_fifths // c * 4/5 high 76590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 76690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm4, mm6 // b * 1/5 + c * 4/5 low 76790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm5, mm7 // b * 1/5 + c * 4/5 high 76890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 76990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, round_values // + 128 77090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, round_values // + 128 77190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 77290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm1, 8 77390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm3, 8 77490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 77590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm1, mm3 // des[2] 77690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq QWORD ptr [esi+ecx*2], mm1 // write des[2] 77790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 77890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm4, round_values // + 128 77990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm5, round_values // + 128 78090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 78190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm4, 8 78290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm5, 8 78390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 78490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm4, mm5 // des[3] 78590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq QWORD ptr [edi], mm4 // write des[3] 78690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 78790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm0, mm2 --- Src[3] 78890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 78990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 // clear mm7 for unpacking 79090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group 79190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 79290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, three_fifths // mm5 = 3/5 79390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm0, mm5 // d * 3/5 79490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 79590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, two_fifths // mm6 = 2/5 79690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm1 // make a copy 79790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 79890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm2, mm5 // d * 3/5 79990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 // unpack low 80090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 80190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm1, mm6 // an * 2/5 80290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm3, mm7 // unpack high 80390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 80490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm1 // d * 3/5 + an * 2/5 80590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, mm6 // an * 2/5 80690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 80790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm3 // d * 3/5 + an * 2/5 80890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, round_values // + 128 80990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 81090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, round_values // + 128 81190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm0, 8 81290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 81390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm2, 8 81490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm0, mm2 // des[4] 81590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 81690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq QWORD ptr [edi+ecx], mm0 // write des[4] 81790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 81890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add edi, 8 81990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add esi, 8 82090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 82190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub edx, 8 82290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jg vs_3_5_loop 82390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 82490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 82590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 82690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/**************************************************************************** 82790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 82890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * ROUTINE : last_vertical_band_3_5_scale_mmx 82990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 83090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * INPUTS : unsigned char *dest : 83190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int dest_pitch : 83290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int dest_width : 83390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 83490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * OUTPUTS : None. 83590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 83690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * RETURNS : void 83790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 83890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * FUNCTION : 3 to 5 up-scaling of a 3-pixel high band of pixels. 83990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 84090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * SPECIAL NOTES : The routine uses the first line of the band below 84190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * the current band. The function also has an "C" only 84290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * version. 84390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 84490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ****************************************************************************/ 84590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic 84690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid last_vertical_band_3_5_scale_mmx 84790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 84890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned char *dest, 84990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_pitch, 85090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_width 85190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 85290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 85390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber __asm 85490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 85590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov esi, dest // Get the source and destination pointer 85690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ecx, dest_pitch // Get the pitch size 85790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 85890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea edi, [esi+ecx*2] // tow lines below 85990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add edi, ecx // three lines below 86090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 86190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 // clear out mm7 86290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edx, dest_width // Loop counter 86390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 86490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 86590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber last_vs_3_5_loop: 86690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 86790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, QWORD ptr [esi] // src[0]; 86890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, QWORD ptr [esi+ecx] // src[1]; 86990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 87090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm0 // Make a copy 87190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 // unpack low to word 87290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 87390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, two_fifths // mm5 = 2/5 87490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm2, mm7 // unpack high to word 87590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 87690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm0, mm5 // a * 2/5 87790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 87890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm1 // make a copy 87990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 // unpack low to word 88090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 88190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm2, mm5 // a * 2/5 88290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, three_fifths // mm6 = 3/5 88390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 88490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm1 // copy of low b 88590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm6 // b * 3/5 88690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 88790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm3, mm7 // unpack high to word 88890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm3 // copy of high b 88990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 89090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, mm6 // b * 3/5 89190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm4 // a * 2/5 + b * 3/5 89290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 89390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm5 // a * 2/5 + b * 3/5 89490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, round_values // + 128 89590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 89690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, round_values // + 128 89790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm0, 8 89890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 89990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm2, 8 90090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm0, mm2 // des [1] 90190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 90290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq QWORD ptr [esi+ecx], mm0 // write des[1] 90390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, [esi+ecx*2] // mm0 = src[2] 90490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 90590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 90690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 90790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm1, mm3 --- Src[1] 90890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm0 --- Src[2] 90990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm7 for unpacking 91090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 91190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm1 // b low 91290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm1, four_fifths // b * 4/5 low 91390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 91490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq QWORD ptr [edi+ecx], mm0 // write des[4] 91590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 91690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm3 // b high 91790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, four_fifths // b * 4/5 high 91890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 91990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm0 // c 92090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, one_fifth // b * 1/5 92190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 92290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 // c low 92390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, one_fifth // b * 1/5 92490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 92590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, mm0 // make copy of c low 92690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm2, mm7 // c high 92790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 92890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm6, one_fifth // c * 1/5 low 92990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm7, mm2 // make copy of c high 93090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 93190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm7, one_fifth // c * 1/5 high 93290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, mm6 // b * 4/5 + c * 1/5 low 93390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 93490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm7 // b * 4/5 + c * 1/5 high 93590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, mm0 // make copy of c low 93690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 93790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm6, four_fifths // c * 4/5 low 93890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm7, mm2 // make copy of c high 93990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 94090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm7, four_fifths // c * 4/5 high 94190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 94290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm4, mm6 // b * 1/5 + c * 4/5 low 94390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm5, mm7 // b * 1/5 + c * 4/5 high 94490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 94590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, round_values // + 128 94690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, round_values // + 128 94790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 94890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm1, 8 94990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm3, 8 95090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 95190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm1, mm3 // des[2] 95290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq QWORD ptr [esi+ecx*2], mm1 // write des[2] 95390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 95490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm4, round_values // + 128 95590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm5, round_values // + 128 95690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 95790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm4, 8 95890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm5, 8 95990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 96090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm4, mm5 // des[3] 96190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq QWORD ptr [edi], mm4 // write des[3] 96290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 96390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // mm0, mm2 --- Src[3] 96490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 96590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add edi, 8 96690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add esi, 8 96790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 96890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub edx, 8 96990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jg last_vs_3_5_loop 97090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 97190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 97290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 97390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/**************************************************************************** 97490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 97590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * ROUTINE : vertical_band_1_2_scale_mmx 97690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 97790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * INPUTS : unsigned char *dest : 97890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int dest_pitch : 97990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int dest_width : 98090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 98190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * OUTPUTS : None. 98290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 98390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * RETURNS : void 98490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 98590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * FUNCTION : 1 to 2 up-scaling of a band of pixels. 98690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 98790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * SPECIAL NOTES : The routine uses the first line of the band below 98890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * the current band. The function also has an "C" only 98990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * version. 99090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 99190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ****************************************************************************/ 99290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic 99390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vertical_band_1_2_scale_mmx 99490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 99590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned char *dest, 99690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_pitch, 99790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_width 99890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 99990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 100090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber __asm 100190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 100290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 100390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov esi, dest // Get the source and destination pointer 100490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ecx, dest_pitch // Get the pitch size 100590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 100690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 // clear out mm7 100790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edx, dest_width // Loop counter 100890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 100990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vs_1_2_loop: 101090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 101190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, [esi] // get Src[0] 101290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, [esi + ecx * 2] // get Src[1] 101390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 101490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm0 // make copy before unpack 101590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm1 // make copy before unpack 101690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 101790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 // low Src[0] 101890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, four_ones // mm6= 1, 1, 1, 1 101990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 102090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 // low Src[1] 102190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm1 // low (a + b) 102290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 102390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm2, mm7 // high Src[0] 102490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm6 // low (a + b + 1) 102590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 102690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm3, mm7 102790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm3 // high (a + b ) 102890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 102990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm0, 1 // low (a + b +1 )/2 103090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm6 // high (a + b + 1) 103190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 103290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm2, 1 // high (a + b + 1)/2 103390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm0, mm2 // pack results 103490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 103590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [esi+ecx], mm0 // write out eight bytes 103690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add esi, 8 103790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 103890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub edx, 8 103990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jg vs_1_2_loop 104090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 104190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 104290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 104390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 104490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/**************************************************************************** 104590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 104690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * ROUTINE : last_vertical_band_1_2_scale_mmx 104790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 104890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * INPUTS : unsigned char *dest : 104990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int dest_pitch : 105090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int dest_width : 105190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 105290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * OUTPUTS : None. 105390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 105490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * RETURNS : void 105590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 105690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * FUNCTION : 1 to 2 up-scaling of band of pixels. 105790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 105890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * SPECIAL NOTES : The routine uses the first line of the band below 105990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * the current band. The function also has an "C" only 106090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * version. 106190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 106290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ****************************************************************************/ 106390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic 106490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid last_vertical_band_1_2_scale_mmx 106590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 106690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned char *dest, 106790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_pitch, 106890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_width 106990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 107090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 107190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber __asm 107290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 107390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov esi, dest // Get the source and destination pointer 107490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ecx, dest_pitch // Get the pitch size 107590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 107690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edx, dest_width // Loop counter 107790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 107890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber last_vs_1_2_loop: 107990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 108090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, [esi] // get Src[0] 108190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [esi+ecx], mm0 // write out eight bytes 108290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 108390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add esi, 8 108490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub edx, 8 108590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 108690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jg last_vs_1_2_loop 108790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 108890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 108990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 109090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/**************************************************************************** 109190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 109290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * ROUTINE : horizontal_line_1_2_scale 109390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 109490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * INPUTS : const unsigned char *source : 109590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int source_width : 109690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned char *dest : 109790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int dest_width : 109890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 109990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * OUTPUTS : None. 110090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 110190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * RETURNS : void 110290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 110390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * FUNCTION : 1 to 2 up-scaling of a horizontal line of pixels. 110490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 110590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * SPECIAL NOTES : None. 110690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 110790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ****************************************************************************/ 110890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic 110990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid horizontal_line_1_2_scale_mmx 111090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 111190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber const unsigned char *source, 111290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int source_width, 111390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned char *dest, 111490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_width 111590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 111690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 111790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber (void) dest_width; 111890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 111990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber __asm 112090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 112190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov esi, source 112290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edi, dest 112390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 112490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 112590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, four_ones 112690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 112790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ecx, source_width 112890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 112990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hs_1_2_loop: 113090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 113190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, [esi] 113290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, [esi+1] 113390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 113490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm0 113590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm1 113690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 113790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm0 113890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 113990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 114090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 114190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm1 114290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 114390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm6 114490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm2, mm7 114590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 114690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm3, mm7 114790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm3 114890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 114990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm6 115090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm0, 1 115190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 115290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm2, 1 115390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm0, mm2 115490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 115590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm4 115690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm2, mm0 115790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 115890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [edi], mm2 115990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm4, mm0 116090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 116190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [edi+8], mm4 116290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add esi, 8 116390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 116490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add edi, 16 116590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub ecx, 8 116690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 116790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp ecx, 8 116890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jg hs_1_2_loop 116990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 117090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber// last eight pixel 117190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 117290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, [esi] 117390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, mm0 117490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 117590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm0 117690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm1 117790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 117890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlq mm1, 8 117990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlq mm3, 56 118090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 118190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psllq mm3, 56 118290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber por mm1, mm3 118390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 118490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm1 118590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, mm0 118690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 118790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 118890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 118990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 119090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm1 119190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm6 119290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 119390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm2, mm7 119490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm3, mm7 119590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 119690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm3 119790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm2, mm6 119890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 119990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm0, 1 120090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psraw mm2, 1 120190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 120290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm0, mm2 120390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm2, mm4 120490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 120590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm2, mm0 120690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [edi], mm2 120790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 120890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpckhbw mm4, mm0 120990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq [edi+8], mm4 121090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 121190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 121290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 121390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 121490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 121590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 121690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 121790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short const54_2[] = { 0, 64, 128, 192 }; 121890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128, 64 }; 121990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 122090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 122190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/**************************************************************************** 122290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 122390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * ROUTINE : horizontal_line_5_4_scale_mmx 122490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 122590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * INPUTS : const unsigned char *source : Pointer to source data. 122690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int source_width : Stride of source. 122790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned char *dest : Pointer to destination data. 122890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int dest_width : Stride of destination (NOT USED). 122990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 123090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * OUTPUTS : None. 123190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 123290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * RETURNS : void 123390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 123490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * FUNCTION : Copies horizontal line of pixels from source to 123590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * destination scaling up by 4 to 5. 123690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 123790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * SPECIAL NOTES : None. 123890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 123990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ****************************************************************************/ 124090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic 124190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid horizontal_line_5_4_scale_mmx 124290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 124390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber const unsigned char *source, 124490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int source_width, 124590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned char *dest, 124690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_width 124790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 124890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 124990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber /* 125090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned i; 125190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int a, b, c, d, e; 125290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned char *des = dest; 125390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber const unsigned char *src = source; 125490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 125590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber (void) dest_width; 125690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 125790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber for ( i=0; i<source_width; i+=5 ) 125890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 125990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber a = src[0]; 126090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b = src[1]; 126190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber c = src[2]; 126290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber d = src[3]; 126390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber e = src[4]; 126490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 126590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber des[0] = a; 126690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber des[1] = ((b*192 + c* 64 + 128)>>8); 126790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber des[2] = ((c*128 + d*128 + 128)>>8); 126890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber des[3] = ((d* 64 + e*192 + 128)>>8); 126990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 127090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber src += 5; 127190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber des += 4; 127290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 127390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber */ 127490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber (void) dest_width; 127590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 127690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber __asm 127790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 127890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 127990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov esi, source ; 128090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edi, dest ; 128190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 128290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ecx, source_width ; 128390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, const54_1 ; 128490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 128590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 ; 128690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, const54_2 ; 128790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 128890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, round_values ; 128990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea edx, [esi+ecx] ; 129090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber horizontal_line_5_4_loop: 129190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 129290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, QWORD PTR [esi] ; 129390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 00 01 02 03 04 05 06 07 129490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, mm0 ; 129590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 00 01 02 03 04 05 06 07 129690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 129790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlq mm0, 8 ; 129890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 01 02 03 04 05 06 07 xx 129990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 ; 130090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber xx 00 xx 01 xx 02 xx 03 130190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 130290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 ; 130390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber xx 01 xx 02 xx 03 xx 04 130490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm1, mm5 130590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 130690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm0, mm6 130790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add esi, 5 130890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 130990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add edi, 4 131090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, mm0 131190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 131290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, mm4 131390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm1, 8 131490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 131590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp esi, edx 131690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm1, mm7 131790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 131890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [edi-4], mm1 131990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 132090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jl horizontal_line_5_4_loop 132190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 132290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 132390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 132490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 132590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short one_fourths[] = { 64, 64, 64, 64 }; 132690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short two_fourths[] = { 128, 128, 128, 128 }; 132790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short three_fourths[] = { 192, 192, 192, 192 }; 132890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 132990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic 133090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) 133190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 133290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 133390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber __asm 133490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 133590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push ebx 133690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 133790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov esi, source // Get the source and destination pointer 133890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ecx, src_pitch // Get the pitch size 133990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 134090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edi, dest // tow lines below 134190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 // clear out mm7 134290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 134390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edx, dest_pitch // Loop counter 134490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ebx, dest_width 134590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 134690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vs_5_4_loop: 134790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 134890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm0, DWORD ptr [esi] // src[0]; 134990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm1, DWORD ptr [esi+ecx] // src[1]; 135090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 135190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm2, DWORD ptr [esi+ecx*2] 135290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea eax, [esi+ecx*2] // 135390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 135490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 135590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm2, mm7 135690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 135790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm3, mm2 135890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm1, three_fourths 135990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 136090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm2, one_fourths 136190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm4, [eax+ecx] 136290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 136390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, two_fourths 136490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm4, mm7 136590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 136690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, mm4 136790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, two_fourths 136890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 136990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, mm2 137090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm6, [eax+ecx*2] 137190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 137290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm5, one_fourths 137390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, round_values; 137490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 137590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm4 137690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm1, 8 137790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 137890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm6, mm7 137990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, round_values 138090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 138190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm6, three_fourths 138290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm3, 8 138390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 138490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm1, mm7 138590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm3, mm7 138690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 138790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [edi], mm0 138890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [edi+edx], mm1 138990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 139090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 139190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm5, mm6 139290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [edi+edx*2], mm3 139390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 139490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea eax, [edi+edx*2] 139590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm5, round_values 139690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 139790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm5, 8 139890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add edi, 4 139990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 140090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm5, mm7 140190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [eax+edx], mm5 140290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 140390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add esi, 4 140490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub ebx, 4 140590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 140690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jg vs_5_4_loop 140790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 140890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop ebx 140990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 141090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 141190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 141290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 141390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short const53_1[] = { 0, 85, 171, 0 }; 141490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short const53_2[] = {256, 171, 85, 0 }; 141590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 141690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 141790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic 141890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid horizontal_line_5_3_scale_mmx 141990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 142090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber const unsigned char *source, 142190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int source_width, 142290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned char *dest, 142390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_width 142490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 142590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 142690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 142790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber (void) dest_width; 142890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber __asm 142990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 143090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 143190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov esi, source ; 143290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edi, dest ; 143390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 143490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ecx, source_width ; 143590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, const53_1 ; 143690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 143790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 ; 143890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, const53_2 ; 143990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 144090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, round_values ; 144190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea edx, [esi+ecx-5] ; 144290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber horizontal_line_5_3_loop: 144390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 144490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, QWORD PTR [esi] ; 144590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 00 01 02 03 04 05 06 07 144690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, mm0 ; 144790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 00 01 02 03 04 05 06 07 144890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 144990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psllw mm0, 8 ; 145090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber xx 00 xx 02 xx 04 xx 06 145190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm1, 8 ; 145290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 01 xx 03 xx 05 xx 07 xx 145390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 145490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm0, 8 ; 145590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 00 xx 02 xx 04 xx 06 xx 145690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psllq mm1, 16 ; 145790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber xx xx 01 xx 03 xx 05 xx 145890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 145990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm0, mm6 146090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 146190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm1, mm5 146290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add esi, 5 146390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 146490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add edi, 3 146590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, mm0 146690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 146790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, mm4 146890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm1, 8 146990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 147090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp esi, edx 147190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm1, mm7 147290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 147390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [edi-3], mm1 147490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jl horizontal_line_5_3_loop 147590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 147690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber//exit condition 147790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, QWORD PTR [esi] ; 147890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 00 01 02 03 04 05 06 07 147990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm1, mm0 ; 148090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 00 01 02 03 04 05 06 07 148190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 148290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psllw mm0, 8 ; 148390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber xx 00 xx 02 xx 04 xx 06 148490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm1, 8 ; 148590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 01 xx 03 xx 05 xx 07 xx 148690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 148790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm0, 8 ; 148890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 00 xx 02 xx 04 xx 06 xx 148990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psllq mm1, 16 ; 149090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber xx xx 01 xx 03 xx 05 xx 149190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 149290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm0, mm6 149390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 149490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm1, mm5 149590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, mm0 149690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 149790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, mm4 149890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm1, 8 149990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 150090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm1, mm7 150190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd eax, mm1 150290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 150390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edx, eax 150490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber shr edx, 16 150590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 150690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov WORD PTR[edi], ax 150790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov BYTE PTR[edi+2], dl 150890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 150990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 151090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 151190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 151290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 151390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short one_thirds[] = { 85, 85, 85, 85 }; 151490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short two_thirds[] = { 171, 171, 171, 171 }; 151590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 151690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic 151790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) 151890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 151990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 152090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber __asm 152190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 152290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push ebx 152390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 152490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov esi, source // Get the source and destination pointer 152590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ecx, src_pitch // Get the pitch size 152690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 152790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edi, dest // tow lines below 152890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 // clear out mm7 152990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 153090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edx, dest_pitch // Loop counter 153190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, one_thirds 153290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 153390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, two_thirds 153490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ebx, dest_width; 153590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 153690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vs_5_3_loop: 153790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 153890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm0, DWORD ptr [esi] // src[0]; 153990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm1, DWORD ptr [esi+ecx] // src[1]; 154090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 154190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm2, DWORD ptr [esi+ecx*2] 154290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea eax, [esi+ecx*2] // 154390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 154490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 154590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm2, mm7 154690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 154790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm1, mm5 154890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm2, mm6 154990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 155090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm3, DWORD ptr [eax+ecx] 155190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm4, DWORD ptr [eax+ecx*2] 155290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 155390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm3, mm7 155490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm4, mm7 155590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 155690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm3, mm6 155790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm4, mm5 155890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 155990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 156090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [edi], mm0 156190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, mm2 156290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 156390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, round_values 156490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm1, 8 156590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 156690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm1, mm7 156790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, mm4 156890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 156990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm3, round_values 157090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [edi+edx], mm1 157190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 157290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm3, 8 157390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm3, mm7 157490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 157590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [edi+edx*2], mm3 157690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 157790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 157890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add edi, 4 157990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add esi, 4 158090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 158190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub ebx, 4 158290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jg vs_5_3_loop 158390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 158490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop ebx 158590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 158690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 158790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 158890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 158990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 159090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 159190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/**************************************************************************** 159290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 159390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * ROUTINE : horizontal_line_2_1_scale 159490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 159590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * INPUTS : const unsigned char *source : 159690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int source_width : 159790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned char *dest : 159890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * unsigned int dest_width : 159990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 160090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * OUTPUTS : None. 160190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 160290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * RETURNS : void 160390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 160490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * FUNCTION : 1 to 2 up-scaling of a horizontal line of pixels. 160590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 160690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * SPECIAL NOTES : None. 160790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 160890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ****************************************************************************/ 160990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic 161090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid horizontal_line_2_1_scale_mmx 161190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber( 161290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber const unsigned char *source, 161390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int source_width, 161490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned char *dest, 161590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber unsigned int dest_width 161690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber) 161790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 161890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber (void) dest_width; 161990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber (void) source_width; 162090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber __asm 162190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 162290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov esi, source 162390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edi, dest 162490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 162590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 162690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov ecx, dest_width 162790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 162890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber xor edx, edx 162990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hs_2_1_loop: 163090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 163190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm0, [esi+edx*2] 163290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psllw mm0, 8 163390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 163490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm0, 8 163590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm0, mm7 163690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 163790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD Ptr [edi+edx], mm0; 163890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add edx, 4 163990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 164090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp edx, ecx 164190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jl hs_2_1_loop 164290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 164390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 164490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 164590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 164690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 164790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 164890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic 164990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) 165090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 165190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber (void) dest_pitch; 165290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber (void) src_pitch; 165390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpx_memcpy(dest, source, dest_width); 165490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 165590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 165690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 165790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short three_sixteenths[] = { 48, 48, 48, 48 }; 165890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber__declspec(align(16)) const static unsigned short ten_sixteenths[] = { 160, 160, 160, 160 }; 165990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 166090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic 166190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) 166290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 166390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 166490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber (void) dest_pitch; 166590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber __asm 166690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber { 166790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov esi, source 166890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edi, dest 166990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 167090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov eax, src_pitch 167190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov edx, dest_width 167290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 167390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pxor mm7, mm7 167490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub esi, eax //back one line 167590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 167690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 167790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lea ecx, [esi+edx]; 167890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm6, round_values; 167990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 168090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm5, three_sixteenths; 168190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movq mm4, ten_sixteenths; 168290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 168390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vs_2_1_i_loop: 168490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm0, [esi] // 168590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm1, [esi+eax] // 168690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 168790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd mm2, [esi+eax*2] // 168890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm0, mm7 168990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 169090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm0, mm5 169190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm1, mm7 169290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 169390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm1, mm4 169490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber punpcklbw mm2, mm7 169590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 169690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pmullw mm2, mm5 169790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, round_values 169890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 169990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm1, mm2 170090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber paddw mm0, mm1 170190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 170290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber psrlw mm0, 8 170390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber packuswb mm0, mm7 170490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 170590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber movd DWORD PTR [edi], mm0 170690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add esi, 4 170790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 170890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add edi, 4; 170990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp esi, ecx 171090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber jl vs_2_1_i_loop 171190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 171290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 171390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 171490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 171590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 171690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 171790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid 171890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberregister_mmxscalers(void) 171990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber{ 172090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx; 172190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx; 172290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx; 172390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx; 172490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx; 172590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx; 172690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx; 172790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx; 172890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx; 172990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 173090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; 173190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; 173290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; 173390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; 173490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; 173590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; 173690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 173790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 173890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 173990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx; 174090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx; 174190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx; 174290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx; 174390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx; 174490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx; 174590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx; 174690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 174790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 174890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 174990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 175090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 1751