diff --git a/MdePkg/Library/BaseMemoryLibRepStr/X64/CopyMem.S b/MdePkg/Library/BaseMemoryLibRepStr/X64/CopyMem.S index 3e7a396b5b..a9c4763483 100644 --- a/MdePkg/Library/BaseMemoryLibRepStr/X64/CopyMem.S +++ b/MdePkg/Library/BaseMemoryLibRepStr/X64/CopyMem.S @@ -44,20 +44,20 @@ ASM_PFX(InternalMemCopyMem): lea r9, [rsi + r8 - 1] # r9 <- End of Source cmp rsi, rdi mov rax, rdi # rax <- Destination as return value - jae _InternalMemCopyMem_al_0000 + jae L0 cmp r9, rdi - jae _atSym_CopyBackward # Copy backward if overlapped -_InternalMemCopyMem_al_0000: + jae L_CopyBackward # Copy backward if overlapped +L0: mov rcx, r8 and r8, 7 shr rcx, 3 rep movsq # Copy as many Qwords as possible - jmp _atSym_CopyBytes -_atSym_CopyBackward: + jmp L_CopyBytes +L_CopyBackward: mov rsi, r9 # rsi <- End of Source lea rdi, [rdi + r8 - 1] # esi <- End of Destination std # set direction flag -_atSym_CopyBytes: +L_CopyBytes: mov rcx, r8 rep movsb # Copy bytes backward cld diff --git a/MdePkg/Library/BaseMemoryLibSse2/X64/CopyMem.S b/MdePkg/Library/BaseMemoryLibSse2/X64/CopyMem.S index dc47510874..35db797677 100644 --- a/MdePkg/Library/BaseMemoryLibSse2/X64/CopyMem.S +++ b/MdePkg/Library/BaseMemoryLibSse2/X64/CopyMem.S @@ -44,38 +44,38 @@ ASM_PFX(InternalMemCopyMem): lea r9, [rsi + r8 - 1] # r9 <- Last byte of Source cmp rsi, rdi mov rax, rdi # rax <- Destination as return value - jae _InternalMemCopyMem_al_0000 # Copy forward if Source > Destination + jae L0 # Copy forward if Source > Destination cmp r9, rdi # Overlapped? - jae _atSym_CopyBackward # Copy backward if overlapped -_InternalMemCopyMem_al_0000: + jae L_CopyBackward # Copy backward if overlapped +L0: xor rcx, rcx sub rcx, rdi # rcx <- -rdi and rcx, 15 # rcx + rsi should be 16 bytes aligned - jz _InternalMemCopyMem_al_0001 # skip if rcx == 0 + jz L1 # skip if rcx == 0 cmp rcx, r8 cmova rcx, r8 sub r8, rcx rep movsb -_InternalMemCopyMem_al_0001: +L1: mov rcx, r8 and r8, 15 shr rcx, 4 # rcx <- # of DQwords to copy - jz _atSym_CopyBytes - movdqa [rsp + 0x18], xmm0 # save xmm0 on stack -_InternalMemCopyMem_al_0002: + jz L_CopyBytes + movdqa [rsp + 0x18], xmm0 # save xmm0 on stack +L2: movdqu xmm0, [rsi] # rsi may not be 16-byte aligned movntdq [rdi], xmm0 # rdi should be 16-byte aligned add rsi, 16 add rdi, 16 - loop _InternalMemCopyMem_al_0002 + loop L2 mfence - movdqa xmm0, [rsp + 0x18] # restore xmm0 - jmp _atSym_CopyBytes # copy remaining bytes -_atSym_CopyBackward: + movdqa xmm0, [rsp + 0x18] # restore xmm0 + jmp L_CopyBytes # copy remaining bytes +L_CopyBackward: mov rsi, r9 # rsi <- Last byte of Source lea rdi, [rdi + r8 - 1] # rdi <- Last byte of Destination std -_atSym_CopyBytes: +L_CopyBytes: mov rcx, r8 rep movsb cld diff --git a/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem.S b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem.S index 50f6d19886..5d915d4a08 100644 --- a/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem.S +++ b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem.S @@ -53,9 +53,9 @@ L0: mov rcx, rdx and rdx, 15 shr rcx, 4 - jz _SetBytes_L2 + jz L_SetBytes mov ah, al # ax <- Value repeats twice - movdqa [rsp + 0x10], xmm0 # save xmm0 + movdqa [rsp + 0x10], xmm0 # save xmm0 movd xmm0, eax # xmm0[0..16] <- Value repeats twice pshuflw xmm0, xmm0, 0 # xmm0[0..63] <- Value repeats 8 times movlhps xmm0, xmm0 # xmm0 <- Value repeats 16 times @@ -64,8 +64,8 @@ L1: add rdi, 16 loop L1 mfence - movdqa xmm0, [rsp + 0x10] # restore xmm0 -_SetBytes_L2: + movdqa xmm0, [rsp + 0x10] # restore xmm0 +L_SetBytes: mov ecx, edx # high 32 bits of rcx are always zero rep stosb mov rax, r9 # rax <- Return value diff --git a/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem16.S b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem16.S index 5678fcd192..354222c1a9 100644 --- a/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem16.S +++ b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem16.S @@ -54,7 +54,7 @@ L0: mov rcx, rdx and edx, 7 shr rcx, 3 - jz _SetWords_L2 + jz L_SetWords movd xmm0, eax pshuflw xmm0, xmm0, 0 movlhps xmm0, xmm0 @@ -63,7 +63,7 @@ L1: add rdi, 16 loop L1 mfence -_SetWords_L2: +L_SetWords: mov ecx, edx rep stosw mov rax, r9 diff --git a/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem32.S b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem32.S index 06ffe768eb..2ace79ad96 100644 --- a/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem32.S +++ b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem32.S @@ -54,7 +54,7 @@ L0: mov rcx, rdx and edx, 3 shr rcx, 2 - jz _SetDwords_L2 + jz L_SetDwords movd xmm0, eax pshufd xmm0, xmm0, 0 L1: @@ -62,7 +62,7 @@ L1: add rdi, 16 loop L1 mfence -_SetDwords_L2: +L_SetDwords: mov ecx, edx rep stosd mov rax, r9 diff --git a/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem64.S b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem64.S index e0aa176d58..870f8c0ee9 100644 --- a/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem64.S +++ b/MdePkg/Library/BaseMemoryLibSse2/X64/SetMem64.S @@ -46,14 +46,14 @@ ASM_PFX(InternalMemSetMem64): dec rcx L0: shr rcx, 1 - jz _SetQwords_L2 + jz L_SetQwords movlhps xmm0, xmm0 L1: movntdq [rdx], xmm0 lea rdx, [rdx + 16] loop L1 mfence -_SetQwords_L2: +L_SetQwords: jnc L2 mov [rdx], r8 L2: diff --git a/MdePkg/Library/BaseMemoryLibSse2/X64/ZeroMem.S b/MdePkg/Library/BaseMemoryLibSse2/X64/ZeroMem.S index 2712579db8..f152d98a39 100644 --- a/MdePkg/Library/BaseMemoryLibSse2/X64/ZeroMem.S +++ b/MdePkg/Library/BaseMemoryLibSse2/X64/ZeroMem.S @@ -51,14 +51,14 @@ L0: mov rcx, rdx and edx, 15 shr rcx, 4 - jz _ZeroBytes_L2 + jz L_ZeroBytes pxor xmm0, xmm0 L1: movntdq [rdi], xmm0 # rdi should be 16-byte aligned add rdi, 16 loop L1 mfence -_ZeroBytes_L2: +L_ZeroBytes: mov ecx, edx rep stosb mov rax, r8