mirror of https://github.com/acidanthera/audk.git
78 lines
2.4 KiB
NASM
78 lines
2.4 KiB
NASM
;------------------------------------------------------------------------------
|
|
;
|
|
; Copyright (c) 2006, Intel Corporation. All rights reserved.<BR>
|
|
; SPDX-License-Identifier: BSD-2-Clause-Patent
|
|
;
|
|
; Module Name:
|
|
;
|
|
; CopyMem.nasm
|
|
;
|
|
; Abstract:
|
|
;
|
|
; CopyMem function
|
|
;
|
|
; Notes:
|
|
;
|
|
;------------------------------------------------------------------------------
|
|
|
|
DEFAULT REL
|
|
SECTION .text
|
|
|
|
;------------------------------------------------------------------------------
|
|
; VOID *
|
|
; EFIAPI
|
|
; InternalMemCopyMem (
|
|
; IN VOID *Destination,
|
|
; IN VOID *Source,
|
|
; IN UINTN Count
|
|
; );
|
|
;------------------------------------------------------------------------------
|
|
global ASM_PFX(InternalMemCopyMem)
|
|
ASM_PFX(InternalMemCopyMem):
|
|
push rsi
|
|
push rdi
|
|
mov rsi, rdx ; rsi <- Source
|
|
mov rdi, rcx ; rdi <- Destination
|
|
lea r9, [rsi + r8 - 1] ; r9 <- Last byte of Source
|
|
cmp rsi, rdi
|
|
mov rax, rdi ; rax <- Destination as return value
|
|
jae .0 ; Copy forward if Source > Destination
|
|
cmp r9, rdi ; Overlapped?
|
|
jae @CopyBackward ; Copy backward if overlapped
|
|
.0:
|
|
xor rcx, rcx
|
|
sub rcx, rdi ; rcx <- -rdi
|
|
and rcx, 15 ; rcx + rsi should be 16 bytes aligned
|
|
jz .1 ; skip if rcx == 0
|
|
cmp rcx, r8
|
|
cmova rcx, r8
|
|
sub r8, rcx
|
|
rep movsb
|
|
.1:
|
|
mov rcx, r8
|
|
and r8, 15
|
|
shr rcx, 4 ; rcx <- # of DQwords to copy
|
|
jz @CopyBytes
|
|
movdqa [rsp + 0x18], xmm0 ; save xmm0 on stack
|
|
.2:
|
|
movdqu xmm0, [rsi] ; rsi may not be 16-byte aligned
|
|
movntdq [rdi], xmm0 ; rdi should be 16-byte aligned
|
|
add rsi, 16
|
|
add rdi, 16
|
|
loop .2
|
|
mfence
|
|
movdqa xmm0, [rsp + 0x18] ; restore xmm0
|
|
jmp @CopyBytes ; copy remaining bytes
|
|
@CopyBackward:
|
|
mov rsi, r9 ; rsi <- Last byte of Source
|
|
lea rdi, [rdi + r8 - 1] ; rdi <- Last byte of Destination
|
|
std
|
|
@CopyBytes:
|
|
mov rcx, r8
|
|
rep movsb
|
|
cld
|
|
pop rdi
|
|
pop rsi
|
|
ret
|
|
|