13eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang/*++ 23eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 33e99020dbf0a159e34b84e7ae9125f2e368d5390lgaoCopyright (c) 2006 - 2010, Intel Corporation. All rights reserved.<BR> 44ea9375a2d02a43671437e0d3d808d85afb30afahhtianThis program and the accompanying materials 53eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangare licensed and made available under the terms and conditions of the BSD License 63eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangwhich accompanies this distribution. The full text of the license may be found at 73eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwanghttp://opensource.org/licenses/bsd-license.php 83eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 93eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangTHE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, 103eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangWITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. 113eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 123eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangModule Name: 133eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 143eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang EfiCopyMemSSE2.c 153eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 163eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangAbstract: 173eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 183eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang This is the code that supports IA32-optimized CopyMem service 193eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 203eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang--*/ 213eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 223eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang#include "Tiano.h" 233eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 243eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangVOID 253eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangEfiCommonLibCopyMem ( 263eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang IN VOID *Destination, 273eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang IN VOID *Source, 283eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang IN UINTN Count 293eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ) 303eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang/*++ 313eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 323eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangRoutine Description: 333eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 343eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang Copy Length bytes from Source to Destination. 353eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 363eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangArguments: 373eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 383eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang Destination - Target of copy 393eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 403eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang Source - Place to copy from 413eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 423eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang Length - Number of bytes to copy 433eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 443eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangReturns: 453eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 463eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang None 473eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 483eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang--*/ 493eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang{ 503eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang __asm { 513eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang mov ecx, Count 523eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang mov esi, Source 533eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang mov edi, Destination 543eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 553eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ; First off, make sure we have no overlap. That is to say, 563eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ; if (Source == Destination) => do nothing 573eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ; if (Source + Count <= Destination) => regular copy 583eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ; if (Destination + Count <= Source) => regular copy 593eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ; otherwise, do a reverse copy 603eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang mov eax, esi 613eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang add eax, ecx ; Source + Count 623eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang cmp eax, edi 633eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang jle _StartByteCopy 643eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 653eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang mov eax, edi 663eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang add eax, ecx ; Dest + Count 673eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang cmp eax, esi 683eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang jle _StartByteCopy 693eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 703eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang cmp esi, edi 713eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang je _CopyMemDone 723eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang jl _CopyOverlapped ; too bad -- overlaps 733eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 743eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ; Pick up misaligned start bytes to get destination pointer 4-byte aligned 753eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang_StartByteCopy: 763eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang cmp ecx, 0 773eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang je _CopyMemDone ; Count == 0, all done 783eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang mov edx, edi 793eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang and dl, 3 ; check lower 2 bits of address 803eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang test dl, dl 813eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang je SHORT _CopyBlocks ; already aligned? 823eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 833eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ; Copy a byte 843eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang mov al, BYTE PTR [esi] ; get byte from Source 853eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang mov BYTE PTR [edi], al ; write byte to Destination 863eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang dec ecx 873eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang inc edi 883eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang inc esi 893eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang jmp _StartByteCopy ; back to top of loop 903eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 913eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang_CopyBlocks: 923eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ; Compute how many 64-byte blocks we can clear 933eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang mov eax, ecx ; get Count in eax 943eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang shr eax, 6 ; convert to 64-byte count 953eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang shl eax, 6 ; convert back to bytes 963eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang sub ecx, eax ; subtract from the original count 973eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang shr eax, 6 ; and this is how many 64-byte blocks 983eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 993eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ; If no 64-byte blocks, then skip 1003eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang cmp eax, 0 1013eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang je _CopyRemainingDWords 1023eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 1033eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 1043eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangcopyxmm: 1053eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 1063eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang movdqu xmm0, OWORD PTR ds:[esi] 1073e99020dbf0a159e34b84e7ae9125f2e368d5390lgao movdqu OWORD PTR ds:[edi], xmm0 1083eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang movdqu xmm1, OWORD PTR ds:[esi+16] 1093e99020dbf0a159e34b84e7ae9125f2e368d5390lgao movdqu OWORD PTR ds:[edi+16], xmm1 1103eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang movdqu xmm2, OWORD PTR ds:[esi+32] 1113e99020dbf0a159e34b84e7ae9125f2e368d5390lgao movdqu OWORD PTR ds:[edi+32], xmm2 1123eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang movdqu xmm3, OWORD PTR ds:[esi+48] 1133e99020dbf0a159e34b84e7ae9125f2e368d5390lgao movdqu OWORD PTR ds:[edi+48], xmm3 1143eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 1153eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang add edi, 64 1163eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang add esi, 64 1173eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang dec eax 1183eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang jnz copyxmm 1193eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 1203eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 1213eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ; Copy as many DWORDS as possible 1223eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang_CopyRemainingDWords: 1233eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang cmp ecx, 4 1243eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang jb _CopyRemainingBytes 1253eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 1263eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang mov eax, DWORD PTR [esi] ; get data from Source 1273eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang mov DWORD PTR [edi], eax ; write byte to Destination 1283eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang sub ecx, 4 ; decrement Count 1293eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang add esi, 4 ; advance Source pointer 1303eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang add edi, 4 ; advance Destination pointer 1313eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang jmp _CopyRemainingDWords ; back to top 1323eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 1333eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang_CopyRemainingBytes: 1343eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang cmp ecx, 0 1353eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang je _CopyMemDone 1363eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang mov al, BYTE PTR [esi] ; get byte from Source 1373eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang mov BYTE PTR [edi], al ; write byte to Destination 1383eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang dec ecx 1393eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang inc esi 1403eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang inc edi ; advance Destination pointer 1413eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang jmp SHORT _CopyRemainingBytes ; back to top of loop 1423eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 1433eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ; 1443eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ; We do this block if the source and destination buffers overlap. To 1453eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ; handle it, copy starting at the end of the source buffer and work 1463eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ; your way back. Since this is the atypical case, this code has not 1473eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ; been optimized, and thus simply copies bytes. 1483eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ; 1493eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang_CopyOverlapped: 1503eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 1513eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang ; Move the source and destination pointers to the end of the range 1523eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang add esi, ecx ; Source + Count 1533eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang dec esi 1543eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang add edi, ecx ; Dest + Count 1553eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang dec edi 1563eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 1573eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang_CopyOverlappedLoop: 1583eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang cmp ecx, 0 1593eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang je _CopyMemDone 1603eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang mov al, BYTE PTR [esi] ; get byte from Source 1613eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang mov BYTE PTR [edi], al ; write byte to Destination 1623eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang dec ecx 1633eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang dec esi 1643eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang dec edi 1653eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang jmp _CopyOverlappedLoop ; back to top of loop 1663eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang 1673eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang_CopyMemDone: 1683eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang } 1693eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang} 170