mirror of https://github.com/acidanthera/audk.git
170 lines
4.9 KiB
C
170 lines
4.9 KiB
C
/*++
|
|
|
|
Copyright (c) 2006, Intel Corporation
|
|
All rights reserved. This program and the accompanying materials
|
|
are licensed and made available under the terms and conditions of the BSD License
|
|
which accompanies this distribution. The full text of the license may be found at
|
|
http://opensource.org/licenses/bsd-license.php
|
|
|
|
THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
|
|
|
|
Module Name:
|
|
|
|
EfiCopyMemSSE2.c
|
|
|
|
Abstract:
|
|
|
|
This is the code that supports IA32-optimized CopyMem service
|
|
|
|
--*/
|
|
|
|
#include "Tiano.h"
|
|
|
|
VOID
|
|
EfiCommonLibCopyMem (
|
|
IN VOID *Destination,
|
|
IN VOID *Source,
|
|
IN UINTN Count
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Copy Length bytes from Source to Destination.
|
|
|
|
Arguments:
|
|
|
|
Destination - Target of copy
|
|
|
|
Source - Place to copy from
|
|
|
|
Length - Number of bytes to copy
|
|
|
|
Returns:
|
|
|
|
None
|
|
|
|
--*/
|
|
{
|
|
__asm {
|
|
mov ecx, Count
|
|
mov esi, Source
|
|
mov edi, Destination
|
|
|
|
; First off, make sure we have no overlap. That is to say,
|
|
; if (Source == Destination) => do nothing
|
|
; if (Source + Count <= Destination) => regular copy
|
|
; if (Destination + Count <= Source) => regular copy
|
|
; otherwise, do a reverse copy
|
|
mov eax, esi
|
|
add eax, ecx ; Source + Count
|
|
cmp eax, edi
|
|
jle _StartByteCopy
|
|
|
|
mov eax, edi
|
|
add eax, ecx ; Dest + Count
|
|
cmp eax, esi
|
|
jle _StartByteCopy
|
|
|
|
cmp esi, edi
|
|
je _CopyMemDone
|
|
jl _CopyOverlapped ; too bad -- overlaps
|
|
|
|
; Pick up misaligned start bytes to get destination pointer 4-byte aligned
|
|
_StartByteCopy:
|
|
cmp ecx, 0
|
|
je _CopyMemDone ; Count == 0, all done
|
|
mov edx, edi
|
|
and dl, 3 ; check lower 2 bits of address
|
|
test dl, dl
|
|
je SHORT _CopyBlocks ; already aligned?
|
|
|
|
; Copy a byte
|
|
mov al, BYTE PTR [esi] ; get byte from Source
|
|
mov BYTE PTR [edi], al ; write byte to Destination
|
|
dec ecx
|
|
inc edi
|
|
inc esi
|
|
jmp _StartByteCopy ; back to top of loop
|
|
|
|
_CopyBlocks:
|
|
; Compute how many 64-byte blocks we can clear
|
|
mov eax, ecx ; get Count in eax
|
|
shr eax, 6 ; convert to 64-byte count
|
|
shl eax, 6 ; convert back to bytes
|
|
sub ecx, eax ; subtract from the original count
|
|
shr eax, 6 ; and this is how many 64-byte blocks
|
|
|
|
; If no 64-byte blocks, then skip
|
|
cmp eax, 0
|
|
je _CopyRemainingDWords
|
|
|
|
|
|
copyxmm:
|
|
|
|
movdqu xmm0, OWORD PTR ds:[esi]
|
|
movdqu QWORD PTR ds:[edi], xmm0
|
|
movdqu xmm1, OWORD PTR ds:[esi+16]
|
|
movdqu QWORD PTR ds:[edi+16], xmm1
|
|
movdqu xmm2, OWORD PTR ds:[esi+32]
|
|
movdqu QWORD PTR ds:[edi+32], xmm2
|
|
movdqu xmm3, OWORD PTR ds:[esi+48]
|
|
movdqu QWORD PTR ds:[edi+48], xmm3
|
|
|
|
add edi, 64
|
|
add esi, 64
|
|
dec eax
|
|
jnz copyxmm
|
|
|
|
|
|
; Copy as many DWORDS as possible
|
|
_CopyRemainingDWords:
|
|
cmp ecx, 4
|
|
jb _CopyRemainingBytes
|
|
|
|
mov eax, DWORD PTR [esi] ; get data from Source
|
|
mov DWORD PTR [edi], eax ; write byte to Destination
|
|
sub ecx, 4 ; decrement Count
|
|
add esi, 4 ; advance Source pointer
|
|
add edi, 4 ; advance Destination pointer
|
|
jmp _CopyRemainingDWords ; back to top
|
|
|
|
_CopyRemainingBytes:
|
|
cmp ecx, 0
|
|
je _CopyMemDone
|
|
mov al, BYTE PTR [esi] ; get byte from Source
|
|
mov BYTE PTR [edi], al ; write byte to Destination
|
|
dec ecx
|
|
inc esi
|
|
inc edi ; advance Destination pointer
|
|
jmp SHORT _CopyRemainingBytes ; back to top of loop
|
|
|
|
;
|
|
; We do this block if the source and destination buffers overlap. To
|
|
; handle it, copy starting at the end of the source buffer and work
|
|
; your way back. Since this is the atypical case, this code has not
|
|
; been optimized, and thus simply copies bytes.
|
|
;
|
|
_CopyOverlapped:
|
|
|
|
; Move the source and destination pointers to the end of the range
|
|
add esi, ecx ; Source + Count
|
|
dec esi
|
|
add edi, ecx ; Dest + Count
|
|
dec edi
|
|
|
|
_CopyOverlappedLoop:
|
|
cmp ecx, 0
|
|
je _CopyMemDone
|
|
mov al, BYTE PTR [esi] ; get byte from Source
|
|
mov BYTE PTR [edi], al ; write byte to Destination
|
|
dec ecx
|
|
dec esi
|
|
dec edi
|
|
jmp _CopyOverlappedLoop ; back to top of loop
|
|
|
|
_CopyMemDone:
|
|
}
|
|
}
|