audk/EdkCompatibilityPkg/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.c

170 lines
4.9 KiB
C

/*++
Copyright (c) 2006, Intel Corporation
All rights reserved. This program and the accompanying materials
are licensed and made available under the terms and conditions of the BSD License
which accompanies this distribution. The full text of the license may be found at
http://opensource.org/licenses/bsd-license.php
THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
Module Name:
EfiCopyMemSSE2.c
Abstract:
This is the code that supports IA32-optimized CopyMem service
--*/
#include "Tiano.h"
VOID
EfiCommonLibCopyMem (
IN VOID *Destination,
IN VOID *Source,
IN UINTN Count
)
/*++
Routine Description:
Copy Length bytes from Source to Destination.
Arguments:
Destination - Target of copy
Source - Place to copy from
Length - Number of bytes to copy
Returns:
None
--*/
{
__asm {
mov ecx, Count
mov esi, Source
mov edi, Destination
; First off, make sure we have no overlap. That is to say,
; if (Source == Destination) => do nothing
; if (Source + Count <= Destination) => regular copy
; if (Destination + Count <= Source) => regular copy
; otherwise, do a reverse copy
mov eax, esi
add eax, ecx ; Source + Count
cmp eax, edi
jle _StartByteCopy
mov eax, edi
add eax, ecx ; Dest + Count
cmp eax, esi
jle _StartByteCopy
cmp esi, edi
je _CopyMemDone
jl _CopyOverlapped ; too bad -- overlaps
; Pick up misaligned start bytes to get destination pointer 4-byte aligned
_StartByteCopy:
cmp ecx, 0
je _CopyMemDone ; Count == 0, all done
mov edx, edi
and dl, 3 ; check lower 2 bits of address
test dl, dl
je SHORT _CopyBlocks ; already aligned?
; Copy a byte
mov al, BYTE PTR [esi] ; get byte from Source
mov BYTE PTR [edi], al ; write byte to Destination
dec ecx
inc edi
inc esi
jmp _StartByteCopy ; back to top of loop
_CopyBlocks:
; Compute how many 64-byte blocks we can clear
mov eax, ecx ; get Count in eax
shr eax, 6 ; convert to 64-byte count
shl eax, 6 ; convert back to bytes
sub ecx, eax ; subtract from the original count
shr eax, 6 ; and this is how many 64-byte blocks
; If no 64-byte blocks, then skip
cmp eax, 0
je _CopyRemainingDWords
copyxmm:
movdqu xmm0, OWORD PTR ds:[esi]
movdqu QWORD PTR ds:[edi], xmm0
movdqu xmm1, OWORD PTR ds:[esi+16]
movdqu QWORD PTR ds:[edi+16], xmm1
movdqu xmm2, OWORD PTR ds:[esi+32]
movdqu QWORD PTR ds:[edi+32], xmm2
movdqu xmm3, OWORD PTR ds:[esi+48]
movdqu QWORD PTR ds:[edi+48], xmm3
add edi, 64
add esi, 64
dec eax
jnz copyxmm
; Copy as many DWORDS as possible
_CopyRemainingDWords:
cmp ecx, 4
jb _CopyRemainingBytes
mov eax, DWORD PTR [esi] ; get data from Source
mov DWORD PTR [edi], eax ; write byte to Destination
sub ecx, 4 ; decrement Count
add esi, 4 ; advance Source pointer
add edi, 4 ; advance Destination pointer
jmp _CopyRemainingDWords ; back to top
_CopyRemainingBytes:
cmp ecx, 0
je _CopyMemDone
mov al, BYTE PTR [esi] ; get byte from Source
mov BYTE PTR [edi], al ; write byte to Destination
dec ecx
inc esi
inc edi ; advance Destination pointer
jmp SHORT _CopyRemainingBytes ; back to top of loop
;
; We do this block if the source and destination buffers overlap. To
; handle it, copy starting at the end of the source buffer and work
; your way back. Since this is the atypical case, this code has not
; been optimized, and thus simply copies bytes.
;
_CopyOverlapped:
; Move the source and destination pointers to the end of the range
add esi, ecx ; Source + Count
dec esi
add edi, ecx ; Dest + Count
dec edi
_CopyOverlappedLoop:
cmp ecx, 0
je _CopyMemDone
mov al, BYTE PTR [esi] ; get byte from Source
mov BYTE PTR [edi], al ; write byte to Destination
dec ecx
dec esi
dec edi
jmp _CopyOverlappedLoop ; back to top of loop
_CopyMemDone:
}
}