1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12%include "vpx_ports/x86_abi_support.asm"
13
14SECTION .text
15
16;void copy_mem8x8_mmx(
17;    unsigned char *src,
18;    int src_stride,
19;    unsigned char *dst,
20;    int dst_stride
21;    )
22global sym(vp8_copy_mem8x8_mmx) PRIVATE
23sym(vp8_copy_mem8x8_mmx):
24    push        rbp
25    mov         rbp, rsp
26    SHADOW_ARGS_TO_STACK 4
27    push        rsi
28    push        rdi
29    ; end prolog
30
31        mov         rsi,        arg(0) ;src;
32        movq        mm0,        [rsi]
33
34        movsxd      rax,        dword ptr arg(1) ;src_stride;
35        mov         rdi,        arg(2) ;dst;
36
37        movq        mm1,        [rsi+rax]
38        movq        mm2,        [rsi+rax*2]
39
40        movsxd      rcx,        dword ptr arg(3) ;dst_stride
41        lea         rsi,        [rsi+rax*2]
42
43        movq        [rdi],      mm0
44        add         rsi,        rax
45
46        movq        [rdi+rcx],      mm1
47        movq        [rdi+rcx*2],    mm2
48
49
50        lea         rdi,        [rdi+rcx*2]
51        movq        mm3,        [rsi]
52
53        add         rdi,        rcx
54        movq        mm4,        [rsi+rax]
55
56        movq        mm5,        [rsi+rax*2]
57        movq        [rdi],      mm3
58
59        lea         rsi,        [rsi+rax*2]
60        movq        [rdi+rcx],  mm4
61
62        movq        [rdi+rcx*2],    mm5
63        lea         rdi,        [rdi+rcx*2]
64
65        movq        mm0,        [rsi+rax]
66        movq        mm1,        [rsi+rax*2]
67
68        movq        [rdi+rcx],  mm0
69        movq        [rdi+rcx*2],mm1
70
71    ; begin epilog
72    pop rdi
73    pop rsi
74    UNSHADOW_ARGS
75    pop         rbp
76    ret
77
78
79;void copy_mem8x4_mmx(
80;    unsigned char *src,
81;    int src_stride,
82;    unsigned char *dst,
83;    int dst_stride
84;    )
85global sym(vp8_copy_mem8x4_mmx) PRIVATE
86sym(vp8_copy_mem8x4_mmx):
87    push        rbp
88    mov         rbp, rsp
89    SHADOW_ARGS_TO_STACK 4
90    push        rsi
91    push        rdi
92    ; end prolog
93
94        mov         rsi,        arg(0) ;src;
95        movq        mm0,        [rsi]
96
97        movsxd      rax,        dword ptr arg(1) ;src_stride;
98        mov         rdi,        arg(2) ;dst;
99
100        movq        mm1,        [rsi+rax]
101        movq        mm2,        [rsi+rax*2]
102
103        movsxd      rcx,        dword ptr arg(3) ;dst_stride
104        lea         rsi,        [rsi+rax*2]
105
106        movq        [rdi],      mm0
107        movq        [rdi+rcx],      mm1
108
109        movq        [rdi+rcx*2],    mm2
110        lea         rdi,        [rdi+rcx*2]
111
112        movq        mm3,        [rsi+rax]
113        movq        [rdi+rcx],      mm3
114
115    ; begin epilog
116    pop rdi
117    pop rsi
118    UNSHADOW_ARGS
119    pop         rbp
120    ret
121