audk/MdePkg/Library/BaseMemoryLibOptDxe/X64/CopyMem.nasm

;------------------------------------------------------------------------------
;
; Copyright (c) 2006, Intel Corporation. All rights reserved.<BR>
; SPDX-License-Identifier: BSD-2-Clause-Patent
;
; Module Name:
;
;   CopyMem.nasm
;
; Abstract:
;
;   CopyMem function
;
; Notes:
;
;------------------------------------------------------------------------------

    DEFAULT REL
    SECTION .text

;------------------------------------------------------------------------------
;  VOID *
;  EFIAPI
;  InternalMemCopyMem (
;    IN VOID   *Destination,
;    IN VOID   *Source,
;    IN UINTN  Count
;    );
;------------------------------------------------------------------------------
global ASM_PFX(InternalMemCopyMem)
ASM_PFX(InternalMemCopyMem):
    push    rsi
    push    rdi
    mov     rsi, rdx                    ; rsi <- Source
    mov     rdi, rcx                    ; rdi <- Destination
    lea     r9, [rsi + r8 - 1]          ; r9 <- Last byte of Source
    cmp     rsi, rdi
    mov     rax, rdi                    ; rax <- Destination as return value
    jae     .0                          ; Copy forward if Source > Destination
    cmp     r9, rdi                     ; Overlapped?
    jae     @CopyBackward               ; Copy backward if overlapped
.0:
    xor     rcx, rcx
    sub     rcx, rdi                    ; rcx <- -rdi
    and     rcx, 15                     ; rcx + rsi should be 16 bytes aligned
    jz      .1                          ; skip if rcx == 0
    cmp     rcx, r8
    cmova   rcx, r8
    sub     r8, rcx
    rep     movsb
.1:
    mov     rcx, r8
    and     r8, 15
    shr     rcx, 4                      ; rcx <- # of DQwords to copy
    jz      @CopyBytes
    movdqa  [rsp + 0x18], xmm0           ; save xmm0 on stack
.2:
    movdqu  xmm0, [rsi]                 ; rsi may not be 16-byte aligned
    movntdq [rdi], xmm0                 ; rdi should be 16-byte aligned
    add     rsi, 16
    add     rdi, 16
    loop    .2
    mfence
    movdqa  xmm0, [rsp + 0x18]           ; restore xmm0
    jmp     @CopyBytes                  ; copy remaining bytes
@CopyBackward:
    mov     rsi, r9                     ; rsi <- Last byte of Source
    lea     rdi, [rdi + r8 - 1]         ; rdi <- Last byte of Destination
    std
@CopyBytes:
    mov     rcx, r8
    rep     movsb
    cld
    pop     rdi
    pop     rsi
    ret
MdePkg BaseMemoryLibOptDxe: Convert X64/CopyMem.asm to NASM The BaseTools/Scripts/ConvertMasmToNasm.py script was used to convert X64/CopyMem.asm to X64/CopyMem.nasm Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: Jordan Justen <jordan.l.justen@intel.com> 2016-05-31 03:52:08 +02:00			`;------------------------------------------------------------------------------`
			`;`
			`; Copyright (c) 2006, Intel Corporation. All rights reserved.<BR>`
MdePkg: Replace BSD License with BSD+Patent License https://bugzilla.tianocore.org/show_bug.cgi?id=1373 Replace BSD 2-Clause License with BSD+Patent License. This change is based on the following emails: https://lists.01.org/pipermail/edk2-devel/2019-February/036260.html https://lists.01.org/pipermail/edk2-devel/2018-October/030385.html RFCs with detailed process for the license change: V3: https://lists.01.org/pipermail/edk2-devel/2019-March/038116.html V2: https://lists.01.org/pipermail/edk2-devel/2019-March/037669.html V1: https://lists.01.org/pipermail/edk2-devel/2019-March/037500.html Contributed-under: TianoCore Contribution Agreement 1.1 Signed-off-by: Michael D Kinney <michael.d.kinney@intel.com> Reviewed-by: Liming Gao <liming.gao@intel.com> 2019-04-04 01:06:00 +02:00			`; SPDX-License-Identifier: BSD-2-Clause-Patent`
MdePkg BaseMemoryLibOptDxe: Convert X64/CopyMem.asm to NASM The BaseTools/Scripts/ConvertMasmToNasm.py script was used to convert X64/CopyMem.asm to X64/CopyMem.nasm Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: Jordan Justen <jordan.l.justen@intel.com> 2016-05-31 03:52:08 +02:00			`;`
			`; Module Name:`
			`;`
			`; CopyMem.nasm`
			`;`
			`; Abstract:`
			`;`
			`; CopyMem function`
			`;`
			`; Notes:`
			`;`
			`;------------------------------------------------------------------------------`

			`DEFAULT REL`
			`SECTION .text`

			`;------------------------------------------------------------------------------`
			`; VOID *`
			`; EFIAPI`
			`; InternalMemCopyMem (`
			`; IN VOID *Destination,`
			`; IN VOID *Source,`
			`; IN UINTN Count`
			`; );`
			`;------------------------------------------------------------------------------`
			`global ASM_PFX(InternalMemCopyMem)`
			`ASM_PFX(InternalMemCopyMem):`
			`push rsi`
			`push rdi`
			`mov rsi, rdx ; rsi <- Source`
			`mov rdi, rcx ; rdi <- Destination`
			`lea r9, [rsi + r8 - 1] ; r9 <- Last byte of Source`
			`cmp rsi, rdi`
			`mov rax, rdi ; rax <- Destination as return value`
			`jae .0 ; Copy forward if Source > Destination`
			`cmp r9, rdi ; Overlapped?`
			`jae @CopyBackward ; Copy backward if overlapped`
			`.0:`
			`xor rcx, rcx`
			`sub rcx, rdi ; rcx <- -rdi`
			`and rcx, 15 ; rcx + rsi should be 16 bytes aligned`
			`jz .1 ; skip if rcx == 0`
			`cmp rcx, r8`
			`cmova rcx, r8`
			`sub r8, rcx`
			`rep movsb`
			`.1:`
			`mov rcx, r8`
			`and r8, 15`
			`shr rcx, 4 ; rcx <- # of DQwords to copy`
			`jz @CopyBytes`
			`movdqa [rsp + 0x18], xmm0 ; save xmm0 on stack`
			`.2:`
			`movdqu xmm0, [rsi] ; rsi may not be 16-byte aligned`
			`movntdq [rdi], xmm0 ; rdi should be 16-byte aligned`
			`add rsi, 16`
			`add rdi, 16`
			`loop .2`
			`mfence`
			`movdqa xmm0, [rsp + 0x18] ; restore xmm0`
			`jmp @CopyBytes ; copy remaining bytes`
			`@CopyBackward:`
			`mov rsi, r9 ; rsi <- Last byte of Source`
			`lea rdi, [rdi + r8 - 1] ; rdi <- Last byte of Destination`
			`std`
			`@CopyBytes:`
			`mov rcx, r8`
			`rep movsb`
			`cld`
			`pop rdi`
			`pop rsi`
			`ret`