From 643ec0e7c1633c1970a154fa6bdada6eb1d3daa2 Mon Sep 17 00:00:00 2001 From: oliviermartin Date: Thu, 31 Mar 2011 12:26:20 +0000 Subject: [PATCH] ArmPkg: Fix InternalMemCopyMem() This function crashed when regions overalapped. The condition to optimize the copy of overlapped regions was not correct. ArmPkg: Add comments to InternalMemCopyMem() These comments explain the flow of this assembly function. git-svn-id: https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2@11486 6f19259b-4bc3-4df7-8a09-765794883524 --- ArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.S | 155 ++++++++++++------ .../Library/BaseMemoryLibStm/Arm/CopyMem.asm | 153 +++++++++++------ 2 files changed, 213 insertions(+), 95 deletions(-) diff --git a/ArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.S b/ArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.S index 1298f94e8d..4e0122bdec 100755 --- a/ArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.S +++ b/ArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.S @@ -43,70 +43,129 @@ GCC_ASM_EXPORT(InternalMemCopyMem) ASM_PFX(InternalMemCopyMem): stmfd sp!, {r4-r11, lr} - tst r0, #3 + // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length) mov r11, r0 mov r10, r0 - mov ip, r2 - mov lr, r1 + mov r12, r2 + mov r14, r1 + +memcopy_check_overlapped: + cmp r11, r1 + // If (dest < source) + bcc memcopy_check_optim_default + // If (dest <= source). But with the previous condition -> If (dest == source) + bls memcopy_end + + // If (source + length < dest) + rsb r3, r1, r11 + cmp r12, r3 + bcc memcopy_check_optim_default + + // If (length == 0) + cmp r12, #0 + beq memcopy_end + + b memcopy_check_optim_overlap + +memcopy_check_optim_default: + // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1) + tst r0, #0xF movne r0, #0 - bne L4 - tst r1, #3 + bne memcopy_default + tst r1, #0xF movne r3, #0 moveq r3, #1 cmp r2, #31 movls r0, #0 andhi r0, r3, #1 -L4: - cmp r11, r1 - bcc L26 - bls L7 - rsb r3, r1, r11 - cmp ip, r3 - bcc L26 - cmp ip, #0 - beq L7 - add r10, r11, ip - add lr, ip, r1 - b L16 -L29: - sub ip, ip, #8 - cmp ip, #7 - ldrd r2, [lr, #-8]! + b memcopy_default + +memcopy_check_optim_overlap: + // r10 = dest_end, r14 = source_end + add r10, r11, r12 + add r14, r12, r1 + + // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned) + cmp r2, #31 movls r0, #0 - cmp ip, #0 - strd r2, [r10, #-8]! - beq L7 -L16: - cmp r0, #0 - bne L29 - sub r3, lr, #1 - sub ip, ip, #1 + movhi r0, #1 + tst r10, #0xF + movne r0, #0 + tst r14, #0xF + movne r0, #0 + b memcopy_overlapped + +memcopy_overlapped_non_optim: + // We read 1 byte from the end of the source buffer + sub r3, r14, #1 + sub r12, r12, #1 ldrb r3, [r3, #0] sub r2, r10, #1 - cmp ip, #0 + cmp r12, #0 + // We write 1 byte at the end of the dest buffer sub r10, r10, #1 - sub lr, lr, #1 + sub r14, r14, #1 strb r3, [r2, #0] - bne L16 - b L7 -L11: - ldrb r3, [lr], #1 - sub ip, ip, #1 - strb r3, [r10], #1 -L26: - cmp ip, #0 - beq L7 -L30: + bne memcopy_overlapped_non_optim + b memcopy_end + +// r10 = dest_end, r14 = source_end +memcopy_overlapped: + // Are we in the optimized case ? cmp r0, #0 - beq L11 - sub ip, ip, #32 - cmp ip, #31 - ldmia lr!, {r2-r9} + beq memcopy_overlapped_non_optim + + // Optimized Overlapped - Read 32 bytes + sub r14, r14, #32 + sub r12, r12, #32 + cmp r12, #31 + ldmia r14, {r2-r9} + + // If length is less than 32 then disable optim movls r0, #0 - cmp ip, #0 + + cmp r12, #0 + + // Optimized Overlapped - Write 32 bytes + sub r10, r10, #32 + stmia r10, {r2-r9} + + // while (length != 0) + bne memcopy_overlapped + b memcopy_end + +memcopy_default_non_optim: + // Byte copy + ldrb r3, [r14], #1 + sub r12, r12, #1 + strb r3, [r10], #1 + +memcopy_default: + cmp r12, #0 + beq memcopy_end + +// r10 = dest, r14 = source +memcopy_default_loop: + cmp r0, #0 + beq memcopy_default_non_optim + + // Optimized memcopy - Read 32 Bytes + sub r12, r12, #32 + cmp r12, #31 + ldmia r14!, {r2-r9} + + // If length is less than 32 then disable optim + movls r0, #0 + + cmp r12, #0 + + // Optimized memcopy - Write 32 Bytes stmia r10!, {r2-r9} - bne L30 -L7: + + // while (length != 0) + bne memcopy_default_loop + +memcopy_end: mov r0, r11 ldmfd sp!, {r4-r11, pc} diff --git a/ArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.asm b/ArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.asm index 1a5e18e120..ca8d06a550 100755 --- a/ArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.asm +++ b/ArmPkg/Library/BaseMemoryLibStm/Arm/CopyMem.asm @@ -43,70 +43,129 @@ InternalMemCopyMem ( InternalMemCopyMem stmfd sp!, {r4-r11, lr} - tst r0, #3 + // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length) mov r11, r0 mov r10, r0 - mov ip, r2 - mov lr, r1 + mov r12, r2 + mov r14, r1 + +memcopy_check_overlapped + cmp r11, r1 + // If (dest < source) + bcc memcopy_check_optim_default + // If (dest <= source). But with the previous condition -> If (dest == source) + bls memcopy_end + + // If (source + length < dest) + rsb r3, r1, r11 + cmp r12, r3 + bcc memcopy_check_optim_default + + // If (length == 0) + cmp r12, #0 + beq memcopy_end + + b memcopy_check_optim_overlap + +memcopy_check_optim_default + // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1) + tst r0, #0xF movne r0, #0 - bne L4 - tst r1, #3 + bne memcopy_default + tst r1, #0xF movne r3, #0 moveq r3, #1 cmp r2, #31 movls r0, #0 andhi r0, r3, #1 -L4 - cmp r11, r1 - bcc L26 - bls L7 - rsb r3, r1, r11 - cmp ip, r3 - bcc L26 - cmp ip, #0 - beq L7 - add r10, r11, ip - add lr, ip, r1 - b L16 -L29 - sub ip, ip, #8 - cmp ip, #7 - ldrd r2, [lr, #-8]! + b memcopy_default + +memcopy_check_optim_overlap + // r10 = dest_end, r14 = source_end + add r10, r11, r12 + add r14, r12, r1 + + // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned) + cmp r2, #31 movls r0, #0 - cmp ip, #0 - strd r2, [r10, #-8]! - beq L7 -L16 - cmp r0, #0 - bne L29 - sub r3, lr, #1 - sub ip, ip, #1 + movhi r0, #1 + tst r10, #0xF + movne r0, #0 + tst r14, #0xF + movne r0, #0 + b memcopy_overlapped + +memcopy_overlapped_non_optim + // We read 1 byte from the end of the source buffer + sub r3, r14, #1 + sub r12, r12, #1 ldrb r3, [r3, #0] sub r2, r10, #1 - cmp ip, #0 + cmp r12, #0 + // We write 1 byte at the end of the dest buffer sub r10, r10, #1 - sub lr, lr, #1 + sub r14, r14, #1 strb r3, [r2, #0] - bne L16 - b L7 -L11 - ldrb r3, [lr], #1 - sub ip, ip, #1 + bne memcopy_overlapped_non_optim + b memcopy_end + +// r10 = dest_end, r14 = source_end +memcopy_overlapped + // Are we in the optimized case ? + cmp r0, #0 + beq memcopy_overlapped_non_optim + + // Optimized Overlapped - Read 32 bytes + sub r14, r14, #32 + sub r12, r12, #32 + cmp r12, #31 + ldmia r14, {r2-r9} + + // If length is less than 32 then disable optim + movls r0, #0 + + cmp r12, #0 + + // Optimized Overlapped - Write 32 bytes + sub r10, r10, #32 + stmia r10, {r2-r9} + + // while (length != 0) + bne memcopy_overlapped + b memcopy_end + +memcopy_default_non_optim + // Byte copy + ldrb r3, [r14], #1 + sub r12, r12, #1 strb r3, [r10], #1 -L26 - cmp ip, #0 - beq L7 -L30 + +memcopy_default + cmp r12, #0 + beq memcopy_end + +// r10 = dest, r14 = source +memcopy_default_loop cmp r0, #0 - beq L11 - sub ip, ip, #32 - cmp ip, #31 - ldmia lr!, {r2-r9} + beq memcopy_default_non_optim + + // Optimized memcopy - Read 32 Bytes + sub r12, r12, #32 + cmp r12, #31 + ldmia r14!, {r2-r9} + + // If length is less than 32 then disable optim movls r0, #0 - cmp ip, #0 + + cmp r12, #0 + + // Optimized memcopy - Write 32 Bytes stmia r10!, {r2-r9} - bne L30 -L7 + + // while (length != 0) + bne memcopy_default_loop + +memcopy_end mov r0, r11 ldmfd sp!, {r4-r11, pc}