mirror of https://github.com/acidanthera/audk.git
ArmPkg: Fix InternalMemCopyMem()
This function crashed when regions overalapped. The condition to optimize the copy of overlapped regions was not correct. ArmPkg: Add comments to InternalMemCopyMem() These comments explain the flow of this assembly function. git-svn-id: https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2@11486 6f19259b-4bc3-4df7-8a09-765794883524
This commit is contained in:
parent
61afd6a9e2
commit
643ec0e7c1
|
@ -43,70 +43,129 @@ GCC_ASM_EXPORT(InternalMemCopyMem)
|
||||||
|
|
||||||
ASM_PFX(InternalMemCopyMem):
|
ASM_PFX(InternalMemCopyMem):
|
||||||
stmfd sp!, {r4-r11, lr}
|
stmfd sp!, {r4-r11, lr}
|
||||||
tst r0, #3
|
// Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)
|
||||||
mov r11, r0
|
mov r11, r0
|
||||||
mov r10, r0
|
mov r10, r0
|
||||||
mov ip, r2
|
mov r12, r2
|
||||||
mov lr, r1
|
mov r14, r1
|
||||||
|
|
||||||
|
memcopy_check_overlapped:
|
||||||
|
cmp r11, r1
|
||||||
|
// If (dest < source)
|
||||||
|
bcc memcopy_check_optim_default
|
||||||
|
// If (dest <= source). But with the previous condition -> If (dest == source)
|
||||||
|
bls memcopy_end
|
||||||
|
|
||||||
|
// If (source + length < dest)
|
||||||
|
rsb r3, r1, r11
|
||||||
|
cmp r12, r3
|
||||||
|
bcc memcopy_check_optim_default
|
||||||
|
|
||||||
|
// If (length == 0)
|
||||||
|
cmp r12, #0
|
||||||
|
beq memcopy_end
|
||||||
|
|
||||||
|
b memcopy_check_optim_overlap
|
||||||
|
|
||||||
|
memcopy_check_optim_default:
|
||||||
|
// Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)
|
||||||
|
tst r0, #0xF
|
||||||
movne r0, #0
|
movne r0, #0
|
||||||
bne L4
|
bne memcopy_default
|
||||||
tst r1, #3
|
tst r1, #0xF
|
||||||
movne r3, #0
|
movne r3, #0
|
||||||
moveq r3, #1
|
moveq r3, #1
|
||||||
cmp r2, #31
|
cmp r2, #31
|
||||||
movls r0, #0
|
movls r0, #0
|
||||||
andhi r0, r3, #1
|
andhi r0, r3, #1
|
||||||
L4:
|
b memcopy_default
|
||||||
cmp r11, r1
|
|
||||||
bcc L26
|
memcopy_check_optim_overlap:
|
||||||
bls L7
|
// r10 = dest_end, r14 = source_end
|
||||||
rsb r3, r1, r11
|
add r10, r11, r12
|
||||||
cmp ip, r3
|
add r14, r12, r1
|
||||||
bcc L26
|
|
||||||
cmp ip, #0
|
// Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)
|
||||||
beq L7
|
cmp r2, #31
|
||||||
add r10, r11, ip
|
|
||||||
add lr, ip, r1
|
|
||||||
b L16
|
|
||||||
L29:
|
|
||||||
sub ip, ip, #8
|
|
||||||
cmp ip, #7
|
|
||||||
ldrd r2, [lr, #-8]!
|
|
||||||
movls r0, #0
|
movls r0, #0
|
||||||
cmp ip, #0
|
movhi r0, #1
|
||||||
strd r2, [r10, #-8]!
|
tst r10, #0xF
|
||||||
beq L7
|
movne r0, #0
|
||||||
L16:
|
tst r14, #0xF
|
||||||
cmp r0, #0
|
movne r0, #0
|
||||||
bne L29
|
b memcopy_overlapped
|
||||||
sub r3, lr, #1
|
|
||||||
sub ip, ip, #1
|
memcopy_overlapped_non_optim:
|
||||||
|
// We read 1 byte from the end of the source buffer
|
||||||
|
sub r3, r14, #1
|
||||||
|
sub r12, r12, #1
|
||||||
ldrb r3, [r3, #0]
|
ldrb r3, [r3, #0]
|
||||||
sub r2, r10, #1
|
sub r2, r10, #1
|
||||||
cmp ip, #0
|
cmp r12, #0
|
||||||
|
// We write 1 byte at the end of the dest buffer
|
||||||
sub r10, r10, #1
|
sub r10, r10, #1
|
||||||
sub lr, lr, #1
|
sub r14, r14, #1
|
||||||
strb r3, [r2, #0]
|
strb r3, [r2, #0]
|
||||||
bne L16
|
bne memcopy_overlapped_non_optim
|
||||||
b L7
|
b memcopy_end
|
||||||
L11:
|
|
||||||
ldrb r3, [lr], #1
|
// r10 = dest_end, r14 = source_end
|
||||||
sub ip, ip, #1
|
memcopy_overlapped:
|
||||||
strb r3, [r10], #1
|
// Are we in the optimized case ?
|
||||||
L26:
|
|
||||||
cmp ip, #0
|
|
||||||
beq L7
|
|
||||||
L30:
|
|
||||||
cmp r0, #0
|
cmp r0, #0
|
||||||
beq L11
|
beq memcopy_overlapped_non_optim
|
||||||
sub ip, ip, #32
|
|
||||||
cmp ip, #31
|
// Optimized Overlapped - Read 32 bytes
|
||||||
ldmia lr!, {r2-r9}
|
sub r14, r14, #32
|
||||||
|
sub r12, r12, #32
|
||||||
|
cmp r12, #31
|
||||||
|
ldmia r14, {r2-r9}
|
||||||
|
|
||||||
|
// If length is less than 32 then disable optim
|
||||||
movls r0, #0
|
movls r0, #0
|
||||||
cmp ip, #0
|
|
||||||
|
cmp r12, #0
|
||||||
|
|
||||||
|
// Optimized Overlapped - Write 32 bytes
|
||||||
|
sub r10, r10, #32
|
||||||
|
stmia r10, {r2-r9}
|
||||||
|
|
||||||
|
// while (length != 0)
|
||||||
|
bne memcopy_overlapped
|
||||||
|
b memcopy_end
|
||||||
|
|
||||||
|
memcopy_default_non_optim:
|
||||||
|
// Byte copy
|
||||||
|
ldrb r3, [r14], #1
|
||||||
|
sub r12, r12, #1
|
||||||
|
strb r3, [r10], #1
|
||||||
|
|
||||||
|
memcopy_default:
|
||||||
|
cmp r12, #0
|
||||||
|
beq memcopy_end
|
||||||
|
|
||||||
|
// r10 = dest, r14 = source
|
||||||
|
memcopy_default_loop:
|
||||||
|
cmp r0, #0
|
||||||
|
beq memcopy_default_non_optim
|
||||||
|
|
||||||
|
// Optimized memcopy - Read 32 Bytes
|
||||||
|
sub r12, r12, #32
|
||||||
|
cmp r12, #31
|
||||||
|
ldmia r14!, {r2-r9}
|
||||||
|
|
||||||
|
// If length is less than 32 then disable optim
|
||||||
|
movls r0, #0
|
||||||
|
|
||||||
|
cmp r12, #0
|
||||||
|
|
||||||
|
// Optimized memcopy - Write 32 Bytes
|
||||||
stmia r10!, {r2-r9}
|
stmia r10!, {r2-r9}
|
||||||
bne L30
|
|
||||||
L7:
|
// while (length != 0)
|
||||||
|
bne memcopy_default_loop
|
||||||
|
|
||||||
|
memcopy_end:
|
||||||
mov r0, r11
|
mov r0, r11
|
||||||
ldmfd sp!, {r4-r11, pc}
|
ldmfd sp!, {r4-r11, pc}
|
||||||
|
|
||||||
|
|
|
@ -43,70 +43,129 @@ InternalMemCopyMem (
|
||||||
|
|
||||||
InternalMemCopyMem
|
InternalMemCopyMem
|
||||||
stmfd sp!, {r4-r11, lr}
|
stmfd sp!, {r4-r11, lr}
|
||||||
tst r0, #3
|
// Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)
|
||||||
mov r11, r0
|
mov r11, r0
|
||||||
mov r10, r0
|
mov r10, r0
|
||||||
mov ip, r2
|
mov r12, r2
|
||||||
mov lr, r1
|
mov r14, r1
|
||||||
|
|
||||||
|
memcopy_check_overlapped
|
||||||
|
cmp r11, r1
|
||||||
|
// If (dest < source)
|
||||||
|
bcc memcopy_check_optim_default
|
||||||
|
// If (dest <= source). But with the previous condition -> If (dest == source)
|
||||||
|
bls memcopy_end
|
||||||
|
|
||||||
|
// If (source + length < dest)
|
||||||
|
rsb r3, r1, r11
|
||||||
|
cmp r12, r3
|
||||||
|
bcc memcopy_check_optim_default
|
||||||
|
|
||||||
|
// If (length == 0)
|
||||||
|
cmp r12, #0
|
||||||
|
beq memcopy_end
|
||||||
|
|
||||||
|
b memcopy_check_optim_overlap
|
||||||
|
|
||||||
|
memcopy_check_optim_default
|
||||||
|
// Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)
|
||||||
|
tst r0, #0xF
|
||||||
movne r0, #0
|
movne r0, #0
|
||||||
bne L4
|
bne memcopy_default
|
||||||
tst r1, #3
|
tst r1, #0xF
|
||||||
movne r3, #0
|
movne r3, #0
|
||||||
moveq r3, #1
|
moveq r3, #1
|
||||||
cmp r2, #31
|
cmp r2, #31
|
||||||
movls r0, #0
|
movls r0, #0
|
||||||
andhi r0, r3, #1
|
andhi r0, r3, #1
|
||||||
L4
|
b memcopy_default
|
||||||
cmp r11, r1
|
|
||||||
bcc L26
|
memcopy_check_optim_overlap
|
||||||
bls L7
|
// r10 = dest_end, r14 = source_end
|
||||||
rsb r3, r1, r11
|
add r10, r11, r12
|
||||||
cmp ip, r3
|
add r14, r12, r1
|
||||||
bcc L26
|
|
||||||
cmp ip, #0
|
// Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)
|
||||||
beq L7
|
cmp r2, #31
|
||||||
add r10, r11, ip
|
|
||||||
add lr, ip, r1
|
|
||||||
b L16
|
|
||||||
L29
|
|
||||||
sub ip, ip, #8
|
|
||||||
cmp ip, #7
|
|
||||||
ldrd r2, [lr, #-8]!
|
|
||||||
movls r0, #0
|
movls r0, #0
|
||||||
cmp ip, #0
|
movhi r0, #1
|
||||||
strd r2, [r10, #-8]!
|
tst r10, #0xF
|
||||||
beq L7
|
movne r0, #0
|
||||||
L16
|
tst r14, #0xF
|
||||||
cmp r0, #0
|
movne r0, #0
|
||||||
bne L29
|
b memcopy_overlapped
|
||||||
sub r3, lr, #1
|
|
||||||
sub ip, ip, #1
|
memcopy_overlapped_non_optim
|
||||||
|
// We read 1 byte from the end of the source buffer
|
||||||
|
sub r3, r14, #1
|
||||||
|
sub r12, r12, #1
|
||||||
ldrb r3, [r3, #0]
|
ldrb r3, [r3, #0]
|
||||||
sub r2, r10, #1
|
sub r2, r10, #1
|
||||||
cmp ip, #0
|
cmp r12, #0
|
||||||
|
// We write 1 byte at the end of the dest buffer
|
||||||
sub r10, r10, #1
|
sub r10, r10, #1
|
||||||
sub lr, lr, #1
|
sub r14, r14, #1
|
||||||
strb r3, [r2, #0]
|
strb r3, [r2, #0]
|
||||||
bne L16
|
bne memcopy_overlapped_non_optim
|
||||||
b L7
|
b memcopy_end
|
||||||
L11
|
|
||||||
ldrb r3, [lr], #1
|
// r10 = dest_end, r14 = source_end
|
||||||
sub ip, ip, #1
|
memcopy_overlapped
|
||||||
strb r3, [r10], #1
|
// Are we in the optimized case ?
|
||||||
L26
|
|
||||||
cmp ip, #0
|
|
||||||
beq L7
|
|
||||||
L30
|
|
||||||
cmp r0, #0
|
cmp r0, #0
|
||||||
beq L11
|
beq memcopy_overlapped_non_optim
|
||||||
sub ip, ip, #32
|
|
||||||
cmp ip, #31
|
// Optimized Overlapped - Read 32 bytes
|
||||||
ldmia lr!, {r2-r9}
|
sub r14, r14, #32
|
||||||
|
sub r12, r12, #32
|
||||||
|
cmp r12, #31
|
||||||
|
ldmia r14, {r2-r9}
|
||||||
|
|
||||||
|
// If length is less than 32 then disable optim
|
||||||
movls r0, #0
|
movls r0, #0
|
||||||
cmp ip, #0
|
|
||||||
|
cmp r12, #0
|
||||||
|
|
||||||
|
// Optimized Overlapped - Write 32 bytes
|
||||||
|
sub r10, r10, #32
|
||||||
|
stmia r10, {r2-r9}
|
||||||
|
|
||||||
|
// while (length != 0)
|
||||||
|
bne memcopy_overlapped
|
||||||
|
b memcopy_end
|
||||||
|
|
||||||
|
memcopy_default_non_optim
|
||||||
|
// Byte copy
|
||||||
|
ldrb r3, [r14], #1
|
||||||
|
sub r12, r12, #1
|
||||||
|
strb r3, [r10], #1
|
||||||
|
|
||||||
|
memcopy_default
|
||||||
|
cmp r12, #0
|
||||||
|
beq memcopy_end
|
||||||
|
|
||||||
|
// r10 = dest, r14 = source
|
||||||
|
memcopy_default_loop
|
||||||
|
cmp r0, #0
|
||||||
|
beq memcopy_default_non_optim
|
||||||
|
|
||||||
|
// Optimized memcopy - Read 32 Bytes
|
||||||
|
sub r12, r12, #32
|
||||||
|
cmp r12, #31
|
||||||
|
ldmia r14!, {r2-r9}
|
||||||
|
|
||||||
|
// If length is less than 32 then disable optim
|
||||||
|
movls r0, #0
|
||||||
|
|
||||||
|
cmp r12, #0
|
||||||
|
|
||||||
|
// Optimized memcopy - Write 32 Bytes
|
||||||
stmia r10!, {r2-r9}
|
stmia r10!, {r2-r9}
|
||||||
bne L30
|
|
||||||
L7
|
// while (length != 0)
|
||||||
|
bne memcopy_default_loop
|
||||||
|
|
||||||
|
memcopy_end
|
||||||
mov r0, r11
|
mov r0, r11
|
||||||
ldmfd sp!, {r4-r11, pc}
|
ldmfd sp!, {r4-r11, pc}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue