From 026a82abf0bd6268d32f4559dbede00715264f74 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 26 Jul 2016 16:37:37 +0200 Subject: [PATCH] BaseTools/GenFw AARCH64: convert ADRP to ADR instructions if binary size allows it The ADRP instruction in the AArch64 ISA requires the link time and load time offsets of a binary to be equal modulo 4 KB. The reason is that this instruction always produces a multiple of 4 KB, and relies on a subsequent ADD or LDR instruction to set the offset into the page. The resulting symbol reference only produces the correct value if the symbol in question resides at that exact offset into the page, and so loading the binary at arbitrary offsets is not possible. Due to the various levels of padding when packing FVs into FVs into FDs, this alignment is very costly for XIP code, and so we would like to relax this alignment requirement if possible. Given that symbols that are sufficiently close (within 1 MB) of the reference can also be reached using an ADR instruction which does not suffer from this alignment issue, let's replace ADRP instructions with ADR after linking if the offset can be encoded in this instruction's immediate field. Note that this only makes sense if the section alignment is < 4 KB. Otherwise, replacing the ADRP has no benefit, considering that the subsequent ADD or LDR instruction is retained, and that micro-architectures are more likely to be optimized for ADRP/ADD pairs (i.e., via micro op fusing) than for ADR/ADD pairs, which are non-typical. Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: Ard Biesheuvel Reviewed-by: Leif Lindholm --- BaseTools/Source/C/GenFw/Elf64Convert.c | 59 +++++++++++++++++++------ 1 file changed, 46 insertions(+), 13 deletions(-) diff --git a/BaseTools/Source/C/GenFw/Elf64Convert.c b/BaseTools/Source/C/GenFw/Elf64Convert.c index 024a2a0d53..944c94b8f8 100644 --- a/BaseTools/Source/C/GenFw/Elf64Convert.c +++ b/BaseTools/Source/C/GenFw/Elf64Convert.c @@ -806,26 +806,59 @@ WriteSections64 ( switch (ELF_R_TYPE(Rel->r_info)) { case R_AARCH64_ADR_PREL_PG_HI21: + // + // AArch64 PG_H21 relocations are typically paired with ABS_LO12 + // relocations, where a PC-relative reference with +/- 4 GB range is + // split into a relative high part and an absolute low part. Since + // the absolute low part represents the offset into a 4 KB page, we + // either have to convert the ADRP into an ADR instruction, or we + // need to use a section alignment of at least 4 KB, so that the + // binary appears at a correct offset at runtime. In any case, we + // have to make sure that the 4 KB relative offsets of both the + // section containing the reference as well as the section to which + // it refers have not been changed during PE/COFF conversion (i.e., + // in ScanSections64() above). + // + if (mCoffAlignment < 0x1000) { + // + // Attempt to convert the ADRP into an ADR instruction. + // This is only possible if the symbol is within +/- 1 MB. + // + INT64 Offset; + + // Decode the ADRP instruction + Offset = (INT32)((*(UINT32 *)Targ & 0xffffe0) << 8); + Offset = (Offset << (6 - 5)) | ((*(UINT32 *)Targ & 0x60000000) >> (29 - 12)); + + // + // ADRP offset is relative to the previous page boundary, + // whereas ADR offset is relative to the instruction itself. + // So fix up the offset so it points to the page containing + // the symbol. + // + Offset -= (UINTN)(Targ - mCoffFile) & 0xfff; + + if (Offset < -0x100000 || Offset > 0xfffff) { + Error (NULL, 0, 3000, "Invalid", "WriteSections64(): %s due to its size (> 1 MB), this module requires 4 KB section alignment.", + mInImageName); + break; + } + + // Re-encode the offset as an ADR instruction + *(UINT32 *)Targ &= 0x1000001f; + *(UINT32 *)Targ |= ((Offset & 0x1ffffc) << (5 - 2)) | ((Offset & 0x3) << 29); + } + /* fall through */ + case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: case R_AARCH64_LDST16_ABS_LO12_NC: case R_AARCH64_LDST32_ABS_LO12_NC: case R_AARCH64_LDST64_ABS_LO12_NC: case R_AARCH64_LDST128_ABS_LO12_NC: - // - // AArch64 PG_H21 relocations are typically paired with ABS_LO12 - // relocations, where a PC-relative reference with +/- 4 GB range is - // split into a relative high part and an absolute low part. Since - // the absolute low part represents the offset into a 4 KB page, we - // have to make sure that the 4 KB relative offsets of both the - // section containing the reference as well as the section to which - // it refers have not been changed during PE/COFF conversion (i.e., - // in ScanSections64() above). - // if (((SecShdr->sh_addr ^ SecOffset) & 0xfff) != 0 || - ((SymShdr->sh_addr ^ mCoffSectionsOffset[Sym->st_shndx]) & 0xfff) != 0 || - mCoffAlignment < 0x1000) { - Error (NULL, 0, 3000, "Invalid", "WriteSections64(): %s AARCH64 small code model requires 4 KB section alignment.", + ((SymShdr->sh_addr ^ mCoffSectionsOffset[Sym->st_shndx]) & 0xfff) != 0) { + Error (NULL, 0, 3000, "Invalid", "WriteSections64(): %s AARCH64 small code model requires identical ELF and PE/COFF section offsets modulo 4 KB.", mInImageName); break; }