2014-08-19 15:29:52 +02:00
|
|
|
#------------------------------------------------------------------------------
|
2010-04-22 00:04:35 +02:00
|
|
|
#
|
|
|
|
# CopyMem() worker for ARM
|
|
|
|
#
|
|
|
|
# This file started out as C code that did 64 bit moves if the buffer was
|
|
|
|
# 32-bit aligned, else it does a byte copy. It also does a byte copy for
|
2014-08-19 15:29:52 +02:00
|
|
|
# any trailing bytes. It was updated to do 32-byte copies using stm/ldm.
|
2010-04-22 00:04:35 +02:00
|
|
|
#
|
2010-04-29 14:15:47 +02:00
|
|
|
# Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR>
|
|
|
|
# This program and the accompanying materials
|
2010-04-22 00:04:35 +02:00
|
|
|
# are licensed and made available under the terms and conditions of the BSD License
|
|
|
|
# which accompanies this distribution. The full text of the license may be found at
|
|
|
|
# http://opensource.org/licenses/bsd-license.php
|
|
|
|
#
|
|
|
|
# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
|
|
|
|
#
|
|
|
|
#------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
/**
|
|
|
|
Copy Length bytes from Source to Destination. Overlap is OK.
|
|
|
|
|
2014-08-19 15:29:52 +02:00
|
|
|
This implementation
|
2010-04-22 00:04:35 +02:00
|
|
|
|
|
|
|
@param Destination Target of copy
|
|
|
|
@param Source Place to copy from
|
|
|
|
@param Length Number of bytes to copy
|
|
|
|
|
|
|
|
@return Destination
|
|
|
|
|
|
|
|
|
|
|
|
VOID *
|
|
|
|
EFIAPI
|
|
|
|
InternalMemCopyMem (
|
|
|
|
OUT VOID *DestinationBuffer,
|
|
|
|
IN CONST VOID *SourceBuffer,
|
|
|
|
IN UINTN Length
|
|
|
|
)
|
|
|
|
**/
|
|
|
|
.text
|
|
|
|
.align 2
|
2011-02-02 23:35:30 +01:00
|
|
|
GCC_ASM_EXPORT(InternalMemCopyMem)
|
2010-04-22 00:04:35 +02:00
|
|
|
|
|
|
|
ASM_PFX(InternalMemCopyMem):
|
2011-02-03 00:19:30 +01:00
|
|
|
stmfd sp!, {r4-r11, lr}
|
2011-03-31 14:26:20 +02:00
|
|
|
// Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)
|
2011-02-03 00:19:30 +01:00
|
|
|
mov r11, r0
|
|
|
|
mov r10, r0
|
2011-03-31 14:26:20 +02:00
|
|
|
mov r12, r2
|
|
|
|
mov r14, r1
|
2014-08-19 15:29:52 +02:00
|
|
|
|
2011-03-31 14:26:20 +02:00
|
|
|
memcopy_check_overlapped:
|
|
|
|
cmp r11, r1
|
|
|
|
// If (dest < source)
|
|
|
|
bcc memcopy_check_optim_default
|
|
|
|
// If (dest <= source). But with the previous condition -> If (dest == source)
|
|
|
|
bls memcopy_end
|
|
|
|
|
|
|
|
// If (source + length < dest)
|
|
|
|
rsb r3, r1, r11
|
|
|
|
cmp r12, r3
|
|
|
|
bcc memcopy_check_optim_default
|
|
|
|
|
2014-08-19 15:29:52 +02:00
|
|
|
// If (length == 0)
|
2011-03-31 14:26:20 +02:00
|
|
|
cmp r12, #0
|
|
|
|
beq memcopy_end
|
2014-08-19 15:29:52 +02:00
|
|
|
|
2011-03-31 14:26:20 +02:00
|
|
|
b memcopy_check_optim_overlap
|
|
|
|
|
|
|
|
memcopy_check_optim_default:
|
|
|
|
// Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)
|
|
|
|
tst r0, #0xF
|
2011-02-03 00:19:30 +01:00
|
|
|
movne r0, #0
|
2011-03-31 14:26:20 +02:00
|
|
|
bne memcopy_default
|
|
|
|
tst r1, #0xF
|
2011-02-03 00:19:30 +01:00
|
|
|
movne r3, #0
|
|
|
|
moveq r3, #1
|
|
|
|
cmp r2, #31
|
|
|
|
movls r0, #0
|
|
|
|
andhi r0, r3, #1
|
2011-03-31 14:26:20 +02:00
|
|
|
b memcopy_default
|
2014-08-19 15:29:52 +02:00
|
|
|
|
2011-03-31 14:26:20 +02:00
|
|
|
memcopy_check_optim_overlap:
|
|
|
|
// r10 = dest_end, r14 = source_end
|
|
|
|
add r10, r11, r12
|
|
|
|
add r14, r12, r1
|
|
|
|
|
|
|
|
// Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)
|
|
|
|
cmp r2, #31
|
2011-02-03 00:19:30 +01:00
|
|
|
movls r0, #0
|
2011-03-31 14:26:20 +02:00
|
|
|
movhi r0, #1
|
|
|
|
tst r10, #0xF
|
|
|
|
movne r0, #0
|
|
|
|
tst r14, #0xF
|
|
|
|
movne r0, #0
|
|
|
|
b memcopy_overlapped
|
2014-08-19 15:29:52 +02:00
|
|
|
|
2011-03-31 14:26:20 +02:00
|
|
|
memcopy_overlapped_non_optim:
|
|
|
|
// We read 1 byte from the end of the source buffer
|
|
|
|
sub r3, r14, #1
|
|
|
|
sub r12, r12, #1
|
2014-08-19 15:29:52 +02:00
|
|
|
ldrb r3, [r3, #0]
|
2011-02-03 00:19:30 +01:00
|
|
|
sub r2, r10, #1
|
2011-03-31 14:26:20 +02:00
|
|
|
cmp r12, #0
|
|
|
|
// We write 1 byte at the end of the dest buffer
|
2011-02-03 00:19:30 +01:00
|
|
|
sub r10, r10, #1
|
2011-03-31 14:26:20 +02:00
|
|
|
sub r14, r14, #1
|
2011-02-03 00:19:30 +01:00
|
|
|
strb r3, [r2, #0]
|
2011-03-31 14:26:20 +02:00
|
|
|
bne memcopy_overlapped_non_optim
|
|
|
|
b memcopy_end
|
|
|
|
|
|
|
|
// r10 = dest_end, r14 = source_end
|
|
|
|
memcopy_overlapped:
|
|
|
|
// Are we in the optimized case ?
|
|
|
|
cmp r0, #0
|
|
|
|
beq memcopy_overlapped_non_optim
|
2014-08-19 15:29:52 +02:00
|
|
|
|
2011-03-31 14:26:20 +02:00
|
|
|
// Optimized Overlapped - Read 32 bytes
|
|
|
|
sub r14, r14, #32
|
|
|
|
sub r12, r12, #32
|
|
|
|
cmp r12, #31
|
|
|
|
ldmia r14, {r2-r9}
|
2014-08-19 15:29:52 +02:00
|
|
|
|
2011-03-31 14:26:20 +02:00
|
|
|
// If length is less than 32 then disable optim
|
|
|
|
movls r0, #0
|
2014-08-19 15:29:52 +02:00
|
|
|
|
2011-03-31 14:26:20 +02:00
|
|
|
cmp r12, #0
|
2014-08-19 15:29:52 +02:00
|
|
|
|
|
|
|
// Optimized Overlapped - Write 32 bytes
|
2011-03-31 14:26:20 +02:00
|
|
|
sub r10, r10, #32
|
|
|
|
stmia r10, {r2-r9}
|
2014-08-19 15:29:52 +02:00
|
|
|
|
2011-03-31 14:26:20 +02:00
|
|
|
// while (length != 0)
|
|
|
|
bne memcopy_overlapped
|
|
|
|
b memcopy_end
|
2014-08-19 15:29:52 +02:00
|
|
|
|
2011-03-31 14:26:20 +02:00
|
|
|
memcopy_default_non_optim:
|
|
|
|
// Byte copy
|
2014-08-19 15:29:52 +02:00
|
|
|
ldrb r3, [r14], #1
|
2011-03-31 14:26:20 +02:00
|
|
|
sub r12, r12, #1
|
2011-02-03 00:19:30 +01:00
|
|
|
strb r3, [r10], #1
|
2014-08-19 15:29:52 +02:00
|
|
|
|
2011-03-31 14:26:20 +02:00
|
|
|
memcopy_default:
|
|
|
|
cmp r12, #0
|
|
|
|
beq memcopy_end
|
2014-08-19 15:29:52 +02:00
|
|
|
|
2011-03-31 14:26:20 +02:00
|
|
|
// r10 = dest, r14 = source
|
|
|
|
memcopy_default_loop:
|
2011-02-03 00:19:30 +01:00
|
|
|
cmp r0, #0
|
2011-03-31 14:26:20 +02:00
|
|
|
beq memcopy_default_non_optim
|
2014-08-19 15:29:52 +02:00
|
|
|
|
2011-03-31 14:26:20 +02:00
|
|
|
// Optimized memcopy - Read 32 Bytes
|
|
|
|
sub r12, r12, #32
|
|
|
|
cmp r12, #31
|
|
|
|
ldmia r14!, {r2-r9}
|
2014-08-19 15:29:52 +02:00
|
|
|
|
2011-03-31 14:26:20 +02:00
|
|
|
// If length is less than 32 then disable optim
|
2011-02-03 00:19:30 +01:00
|
|
|
movls r0, #0
|
2014-08-19 15:29:52 +02:00
|
|
|
|
2011-03-31 14:26:20 +02:00
|
|
|
cmp r12, #0
|
2014-08-19 15:29:52 +02:00
|
|
|
|
2011-03-31 14:26:20 +02:00
|
|
|
// Optimized memcopy - Write 32 Bytes
|
2011-02-03 00:19:30 +01:00
|
|
|
stmia r10!, {r2-r9}
|
2011-03-31 14:26:20 +02:00
|
|
|
|
|
|
|
// while (length != 0)
|
|
|
|
bne memcopy_default_loop
|
2014-08-19 15:29:52 +02:00
|
|
|
|
2011-03-31 14:26:20 +02:00
|
|
|
memcopy_end:
|
2011-02-03 00:19:30 +01:00
|
|
|
mov r0, r11
|
|
|
|
ldmfd sp!, {r4-r11, pc}
|
2014-08-19 15:29:52 +02:00
|
|
|
|