13eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang/*++
23eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
33e99020dbf0a159e34b84e7ae9125f2e368d5390lgaoCopyright (c) 2006 - 2010, Intel Corporation. All rights reserved.<BR>
44ea9375a2d02a43671437e0d3d808d85afb30afahhtianThis program and the accompanying materials
53eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangare licensed and made available under the terms and conditions of the BSD License
63eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangwhich accompanies this distribution.  The full text of the license may be found at
73eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwanghttp://opensource.org/licenses/bsd-license.php
83eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
93eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangTHE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
103eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangWITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
113eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
123eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangModule Name:
133eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
143eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  EfiCopyMemSSE2.c
153eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
163eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangAbstract:
173eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
183eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  This is the code that supports IA32-optimized CopyMem service
193eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
203eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang--*/
213eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
223eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang#include "Tiano.h"
233eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
243eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangVOID
253eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangEfiCommonLibCopyMem (
263eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  IN VOID   *Destination,
273eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  IN VOID   *Source,
283eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  IN UINTN  Count
293eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  )
303eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang/*++
313eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
323eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangRoutine Description:
333eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
343eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  Copy Length bytes from Source to Destination.
353eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
363eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangArguments:
373eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
383eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  Destination - Target of copy
393eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
403eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  Source      - Place to copy from
413eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
423eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  Length      - Number of bytes to copy
433eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
443eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangReturns:
453eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
463eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  None
473eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
483eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang--*/
493eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang{
503eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  __asm {
513eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  mov   ecx, Count
523eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  mov   esi, Source
533eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  mov   edi, Destination
543eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
553eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  ; First off, make sure we have no overlap. That is to say,
563eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  ;   if (Source == Destination)           => do nothing
573eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  ;   if (Source + Count <= Destination)   => regular copy
583eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  ;   if (Destination + Count <= Source)   => regular copy
593eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  ;   otherwise, do a reverse copy
603eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  mov   eax, esi
613eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  add   eax, ecx                      ; Source + Count
623eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  cmp   eax, edi
633eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  jle   _StartByteCopy
643eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
653eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  mov   eax, edi
663eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  add   eax, ecx                      ; Dest + Count
673eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  cmp   eax, esi
683eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  jle   _StartByteCopy
693eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
703eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  cmp   esi, edi
713eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  je    _CopyMemDone
723eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  jl    _CopyOverlapped               ; too bad -- overlaps
733eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
743eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  ; Pick up misaligned start bytes to get destination pointer 4-byte aligned
753eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang_StartByteCopy:
763eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  cmp   ecx, 0
773eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  je    _CopyMemDone                ; Count == 0, all done
783eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  mov   edx, edi
793eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  and   dl, 3                       ; check lower 2 bits of address
803eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  test  dl, dl
813eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  je    SHORT _CopyBlocks           ; already aligned?
823eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
833eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  ; Copy a byte
843eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  mov   al, BYTE PTR [esi]          ; get byte from Source
853eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  mov   BYTE PTR [edi], al          ; write byte to Destination
863eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  dec    ecx
873eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  inc   edi
883eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  inc   esi
893eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  jmp   _StartByteCopy               ; back to top of loop
903eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
913eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang_CopyBlocks:
923eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  ; Compute how many 64-byte blocks we can clear
933eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  mov   eax, ecx                    ; get Count in eax
943eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  shr   eax, 6                      ; convert to 64-byte count
953eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  shl   eax, 6                      ; convert back to bytes
963eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  sub   ecx, eax                    ; subtract from the original count
973eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  shr   eax, 6                      ; and this is how many 64-byte blocks
983eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
993eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  ; If no 64-byte blocks, then skip
1003eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  cmp   eax, 0
1013eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  je    _CopyRemainingDWords
1023eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
1033eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
1043eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwangcopyxmm:
1053eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
1063eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  movdqu xmm0, OWORD PTR ds:[esi]
1073e99020dbf0a159e34b84e7ae9125f2e368d5390lgao  movdqu OWORD PTR ds:[edi], xmm0
1083eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  movdqu xmm1, OWORD PTR ds:[esi+16]
1093e99020dbf0a159e34b84e7ae9125f2e368d5390lgao  movdqu OWORD PTR ds:[edi+16], xmm1
1103eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  movdqu xmm2, OWORD PTR ds:[esi+32]
1113e99020dbf0a159e34b84e7ae9125f2e368d5390lgao  movdqu OWORD PTR ds:[edi+32], xmm2
1123eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  movdqu xmm3, OWORD PTR ds:[esi+48]
1133e99020dbf0a159e34b84e7ae9125f2e368d5390lgao  movdqu OWORD PTR ds:[edi+48], xmm3
1143eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
1153eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  add   edi, 64
1163eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  add   esi, 64
1173eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  dec   eax
1183eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  jnz   copyxmm
1193eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
1203eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
1213eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  ; Copy as many DWORDS as possible
1223eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang_CopyRemainingDWords:
1233eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  cmp   ecx, 4
1243eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  jb    _CopyRemainingBytes
1253eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
1263eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  mov   eax, DWORD PTR [esi]        ; get data from Source
1273eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  mov   DWORD PTR [edi], eax        ; write byte to Destination
1283eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  sub   ecx, 4                      ; decrement Count
1293eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  add   esi, 4                      ; advance Source pointer
1303eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  add   edi, 4                      ; advance Destination pointer
1313eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  jmp   _CopyRemainingDWords        ; back to top
1323eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
1333eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang_CopyRemainingBytes:
1343eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  cmp   ecx, 0
1353eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  je    _CopyMemDone
1363eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  mov   al, BYTE PTR [esi]          ; get byte from Source
1373eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  mov   BYTE PTR [edi], al          ; write byte to Destination
1383eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  dec    ecx
1393eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  inc    esi
1403eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  inc   edi                      ; advance Destination pointer
1413eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  jmp   SHORT _CopyRemainingBytes   ; back to top of loop
1423eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
1433eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  ;
1443eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  ; We do this block if the source and destination buffers overlap. To
1453eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  ; handle it, copy starting at the end of the source buffer and work
1463eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  ; your way back. Since this is the atypical case, this code has not
1473eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  ; been optimized, and thus simply copies bytes.
1483eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  ;
1493eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang_CopyOverlapped:
1503eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
1513eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  ; Move the source and destination pointers to the end of the range
1523eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  add   esi, ecx                      ; Source + Count
1533eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  dec    esi
1543eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  add   edi, ecx                      ; Dest + Count
1553eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  dec    edi
1563eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
1573eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang_CopyOverlappedLoop:
1583eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  cmp   ecx, 0
1593eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  je    _CopyMemDone
1603eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  mov   al, BYTE PTR [esi]          ; get byte from Source
1613eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  mov   BYTE PTR [edi], al          ; write byte to Destination
1623eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  dec    ecx
1633eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  dec    esi
1643eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  dec   edi
1653eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  jmp   _CopyOverlappedLoop         ; back to top of loop
1663eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang
1673eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang_CopyMemDone:
1683eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang  }
1693eb9473ea9a949badfe06ae61d2d3fcfa53651c7qwang}
170