ArmPkg: Fix InternalMemCopyMem()

This function crashed when regions overalapped. The condition to
optimize the copy of overlapped regions was not correct.
    

ArmPkg: Add comments to InternalMemCopyMem()
    
These comments explain the flow of this assembly function.




git-svn-id: https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2@11486 6f19259b-4bc3-4df7-8a09-765794883524
This commit is contained in:
oliviermartin 2011-03-31 12:26:20 +00:00
parent 61afd6a9e2
commit 643ec0e7c1
2 changed files with 213 additions and 95 deletions

View File

@ -43,70 +43,129 @@ GCC_ASM_EXPORT(InternalMemCopyMem)
ASM_PFX(InternalMemCopyMem): ASM_PFX(InternalMemCopyMem):
stmfd sp!, {r4-r11, lr} stmfd sp!, {r4-r11, lr}
tst r0, #3 // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)
mov r11, r0 mov r11, r0
mov r10, r0 mov r10, r0
mov ip, r2 mov r12, r2
mov lr, r1 mov r14, r1
memcopy_check_overlapped:
cmp r11, r1
// If (dest < source)
bcc memcopy_check_optim_default
// If (dest <= source). But with the previous condition -> If (dest == source)
bls memcopy_end
// If (source + length < dest)
rsb r3, r1, r11
cmp r12, r3
bcc memcopy_check_optim_default
// If (length == 0)
cmp r12, #0
beq memcopy_end
b memcopy_check_optim_overlap
memcopy_check_optim_default:
// Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)
tst r0, #0xF
movne r0, #0 movne r0, #0
bne L4 bne memcopy_default
tst r1, #3 tst r1, #0xF
movne r3, #0 movne r3, #0
moveq r3, #1 moveq r3, #1
cmp r2, #31 cmp r2, #31
movls r0, #0 movls r0, #0
andhi r0, r3, #1 andhi r0, r3, #1
L4: b memcopy_default
cmp r11, r1
bcc L26 memcopy_check_optim_overlap:
bls L7 // r10 = dest_end, r14 = source_end
rsb r3, r1, r11 add r10, r11, r12
cmp ip, r3 add r14, r12, r1
bcc L26
cmp ip, #0 // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)
beq L7 cmp r2, #31
add r10, r11, ip
add lr, ip, r1
b L16
L29:
sub ip, ip, #8
cmp ip, #7
ldrd r2, [lr, #-8]!
movls r0, #0 movls r0, #0
cmp ip, #0 movhi r0, #1
strd r2, [r10, #-8]! tst r10, #0xF
beq L7 movne r0, #0
L16: tst r14, #0xF
cmp r0, #0 movne r0, #0
bne L29 b memcopy_overlapped
sub r3, lr, #1
sub ip, ip, #1 memcopy_overlapped_non_optim:
// We read 1 byte from the end of the source buffer
sub r3, r14, #1
sub r12, r12, #1
ldrb r3, [r3, #0] ldrb r3, [r3, #0]
sub r2, r10, #1 sub r2, r10, #1
cmp ip, #0 cmp r12, #0
// We write 1 byte at the end of the dest buffer
sub r10, r10, #1 sub r10, r10, #1
sub lr, lr, #1 sub r14, r14, #1
strb r3, [r2, #0] strb r3, [r2, #0]
bne L16 bne memcopy_overlapped_non_optim
b L7 b memcopy_end
L11:
ldrb r3, [lr], #1 // r10 = dest_end, r14 = source_end
sub ip, ip, #1 memcopy_overlapped:
strb r3, [r10], #1 // Are we in the optimized case ?
L26:
cmp ip, #0
beq L7
L30:
cmp r0, #0 cmp r0, #0
beq L11 beq memcopy_overlapped_non_optim
sub ip, ip, #32
cmp ip, #31 // Optimized Overlapped - Read 32 bytes
ldmia lr!, {r2-r9} sub r14, r14, #32
sub r12, r12, #32
cmp r12, #31
ldmia r14, {r2-r9}
// If length is less than 32 then disable optim
movls r0, #0 movls r0, #0
cmp ip, #0
cmp r12, #0
// Optimized Overlapped - Write 32 bytes
sub r10, r10, #32
stmia r10, {r2-r9}
// while (length != 0)
bne memcopy_overlapped
b memcopy_end
memcopy_default_non_optim:
// Byte copy
ldrb r3, [r14], #1
sub r12, r12, #1
strb r3, [r10], #1
memcopy_default:
cmp r12, #0
beq memcopy_end
// r10 = dest, r14 = source
memcopy_default_loop:
cmp r0, #0
beq memcopy_default_non_optim
// Optimized memcopy - Read 32 Bytes
sub r12, r12, #32
cmp r12, #31
ldmia r14!, {r2-r9}
// If length is less than 32 then disable optim
movls r0, #0
cmp r12, #0
// Optimized memcopy - Write 32 Bytes
stmia r10!, {r2-r9} stmia r10!, {r2-r9}
bne L30
L7: // while (length != 0)
bne memcopy_default_loop
memcopy_end:
mov r0, r11 mov r0, r11
ldmfd sp!, {r4-r11, pc} ldmfd sp!, {r4-r11, pc}

View File

@ -43,70 +43,129 @@ InternalMemCopyMem (
InternalMemCopyMem InternalMemCopyMem
stmfd sp!, {r4-r11, lr} stmfd sp!, {r4-r11, lr}
tst r0, #3 // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)
mov r11, r0 mov r11, r0
mov r10, r0 mov r10, r0
mov ip, r2 mov r12, r2
mov lr, r1 mov r14, r1
memcopy_check_overlapped
cmp r11, r1
// If (dest < source)
bcc memcopy_check_optim_default
// If (dest <= source). But with the previous condition -> If (dest == source)
bls memcopy_end
// If (source + length < dest)
rsb r3, r1, r11
cmp r12, r3
bcc memcopy_check_optim_default
// If (length == 0)
cmp r12, #0
beq memcopy_end
b memcopy_check_optim_overlap
memcopy_check_optim_default
// Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)
tst r0, #0xF
movne r0, #0 movne r0, #0
bne L4 bne memcopy_default
tst r1, #3 tst r1, #0xF
movne r3, #0 movne r3, #0
moveq r3, #1 moveq r3, #1
cmp r2, #31 cmp r2, #31
movls r0, #0 movls r0, #0
andhi r0, r3, #1 andhi r0, r3, #1
L4 b memcopy_default
cmp r11, r1
bcc L26 memcopy_check_optim_overlap
bls L7 // r10 = dest_end, r14 = source_end
rsb r3, r1, r11 add r10, r11, r12
cmp ip, r3 add r14, r12, r1
bcc L26
cmp ip, #0 // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)
beq L7 cmp r2, #31
add r10, r11, ip
add lr, ip, r1
b L16
L29
sub ip, ip, #8
cmp ip, #7
ldrd r2, [lr, #-8]!
movls r0, #0 movls r0, #0
cmp ip, #0 movhi r0, #1
strd r2, [r10, #-8]! tst r10, #0xF
beq L7 movne r0, #0
L16 tst r14, #0xF
cmp r0, #0 movne r0, #0
bne L29 b memcopy_overlapped
sub r3, lr, #1
sub ip, ip, #1 memcopy_overlapped_non_optim
// We read 1 byte from the end of the source buffer
sub r3, r14, #1
sub r12, r12, #1
ldrb r3, [r3, #0] ldrb r3, [r3, #0]
sub r2, r10, #1 sub r2, r10, #1
cmp ip, #0 cmp r12, #0
// We write 1 byte at the end of the dest buffer
sub r10, r10, #1 sub r10, r10, #1
sub lr, lr, #1 sub r14, r14, #1
strb r3, [r2, #0] strb r3, [r2, #0]
bne L16 bne memcopy_overlapped_non_optim
b L7 b memcopy_end
L11
ldrb r3, [lr], #1 // r10 = dest_end, r14 = source_end
sub ip, ip, #1 memcopy_overlapped
strb r3, [r10], #1 // Are we in the optimized case ?
L26
cmp ip, #0
beq L7
L30
cmp r0, #0 cmp r0, #0
beq L11 beq memcopy_overlapped_non_optim
sub ip, ip, #32
cmp ip, #31 // Optimized Overlapped - Read 32 bytes
ldmia lr!, {r2-r9} sub r14, r14, #32
sub r12, r12, #32
cmp r12, #31
ldmia r14, {r2-r9}
// If length is less than 32 then disable optim
movls r0, #0 movls r0, #0
cmp ip, #0
cmp r12, #0
// Optimized Overlapped - Write 32 bytes
sub r10, r10, #32
stmia r10, {r2-r9}
// while (length != 0)
bne memcopy_overlapped
b memcopy_end
memcopy_default_non_optim
// Byte copy
ldrb r3, [r14], #1
sub r12, r12, #1
strb r3, [r10], #1
memcopy_default
cmp r12, #0
beq memcopy_end
// r10 = dest, r14 = source
memcopy_default_loop
cmp r0, #0
beq memcopy_default_non_optim
// Optimized memcopy - Read 32 Bytes
sub r12, r12, #32
cmp r12, #31
ldmia r14!, {r2-r9}
// If length is less than 32 then disable optim
movls r0, #0
cmp r12, #0
// Optimized memcopy - Write 32 Bytes
stmia r10!, {r2-r9} stmia r10!, {r2-r9}
bne L30
L7 // while (length != 0)
bne memcopy_default_loop
memcopy_end
mov r0, r11 mov r0, r11
ldmfd sp!, {r4-r11, pc} ldmfd sp!, {r4-r11, pc}