From 2069a63a8e4b190a6992b45060c0dd0d5a5f3c73 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 26 Jun 2024 13:26:08 +0200 Subject: [PATCH] OvmfPkg/PlatformInitLib: allow switching to 4-level paging There are a number of mostly older guests such as RHEL-7 which do not support 5-level paging. This patch adds support for switching from 5-level paging mode back to 4-level paging mode. This is done in PEI, after inspecting the address space needed (installed memory and reservations configured via fw_cfg). By default small guests (which need less than 1 TB) will use 4-level paging mode. There is a fw_cfg override though, so it is possible to force the one or the other this way: qemu-system-x86_64 -fw_cfg name=opt/org.tianocode/PagingLevel,string=5 Signed-off-by: Gerd Hoffmann --- OvmfPkg/Library/PlatformInitLib/MemDetect.c | 107 ++++++++++++++++++ .../PlatformInitLib/PlatformInitLib.inf | 2 + .../Library/PlatformInitLib/X64/Paging.nasm | 76 +++++++++++++ 3 files changed, 185 insertions(+) create mode 100644 OvmfPkg/Library/PlatformInitLib/X64/Paging.nasm diff --git a/OvmfPkg/Library/PlatformInitLib/MemDetect.c b/OvmfPkg/Library/PlatformInitLib/MemDetect.c index bd6c79e4e4..0acc0e1275 100644 --- a/OvmfPkg/Library/PlatformInitLib/MemDetect.c +++ b/OvmfPkg/Library/PlatformInitLib/MemDetect.c @@ -898,6 +898,111 @@ PlatformScanHostProvided64BitPciMmioEnd ( return EFI_NOT_FOUND; } +VOID +EFIAPI +Switch4Level ( + VOID + ); + +/** + Configure x64 paging levels. + + + The OVMF ResetVector code will enter long mode with 5-level paging if the + following conditions are true: + + (1) OVMF has been built with PcdUse5LevelPageTable = TRUE, and + (2) the CPU supports 5-level paging (aka la57), and + (3) the CPU supports gigabyte pages, and + (4) the VM is not running in SEV mode. + + Condition (4) is a temporary stopgap for BaseMemEncryptSevLib not supporting + 5-level paging yet. + + + This function looks at the virtual machine configuration, then decides + whenever it will continue to use 5-level paging or downgrade to 4-level + paging for better compatibility with older guest OS versions. + + There is a fw_cfg config option to explicitly request 4 or 5-level paging + using 'qemu -fw_cfg name=opt/org.tianocode/PagingLevel,string=4|5'. If the + option is present the requested paging level will be used. + + Should that not be the case the function checks the size of the address space + needed, which is the RAM installed plus fw_cfg reservations. The downgrade + to 4-level paging will happen for small guests where the address space needed + is lower than 1TB. + + + This function will also log the paging level used and the reason for that. +**/ +STATIC +VOID +PlatformSetupPagingLevel ( + IN OUT EFI_HOB_PLATFORM_INFO *PlatformInfoHob + ) +{ + #ifdef MDE_CPU_X64 + UINT32 PagingLevel; + EFI_STATUS Status; + IA32_CR4 Cr4; + + Cr4.UintN = AsmReadCr4 (); + if (!Cr4.Bits.LA57) { + /* The OvmfPkg ResetVector has NOT turned on 5-level paging, log the reason. */ + if (!PcdGetBool (PcdUse5LevelPageTable)) { + DEBUG ((DEBUG_INFO, "%a: using 4-level paging (PcdUse5LevelPageTable disabled)\n", __func__)); + } else { + DEBUG ((DEBUG_INFO, "%a: using 4-level paging (la57 not supported by cpu)\n", __func__)); + } + + return; + } + + Status = QemuFwCfgParseUint32 ( + "opt/org.tianocode/PagingLevel", + FALSE, + &PagingLevel + ); + switch (Status) { + case EFI_NOT_FOUND: + if (PlatformInfoHob->FirstNonAddress < (1ll << 40)) { + // + // If the highest address actually used is below 1TB switch back into + // 4-level paging mode for better compatibility with older guests. + // + DEBUG ((DEBUG_INFO, "%a: using 4-level paging (default for small guest)\n", __func__)); + PagingLevel = 4; + } else { + DEBUG ((DEBUG_INFO, "%a: using 5-level paging (default for large guest)\n", __func__)); + PagingLevel = 5; + } + + break; + case EFI_SUCCESS: + if ((PagingLevel != 4) && (PagingLevel != 5)) { + DEBUG ((DEBUG_INFO, "%a: invalid paging level in fw_cfg: %d\n", __func__, PagingLevel)); + return; + } + + DEBUG ((DEBUG_INFO, "%a: using %d-level paging (fw_cfg override)\n", __func__, PagingLevel)); + break; + default: + DEBUG ((DEBUG_WARN, "%a: QemuFwCfgParseUint32: %r\n", __func__, Status)); + return; + } + + if (PagingLevel == 4) { + Switch4Level (); + } + + if (PagingLevel == 5) { + /* The OvmfPkg ResetVector has turned on 5-level paging, nothing to do here. */ + } + + #endif +} + /** Initialize the PhysMemAddressWidth field in PlatformInfoHob based on guest RAM size. **/ @@ -946,6 +1051,8 @@ PlatformAddressWidthInitialization ( PlatformGetFirstNonAddress (PlatformInfoHob); } + PlatformSetupPagingLevel (PlatformInfoHob); + PlatformAddressWidthFromCpuid (PlatformInfoHob, TRUE); if (PlatformInfoHob->PhysMemAddressWidth != 0) { // physical address width is known diff --git a/OvmfPkg/Library/PlatformInitLib/PlatformInitLib.inf b/OvmfPkg/Library/PlatformInitLib/PlatformInitLib.inf index 21e6efa5e0..e9c07467bb 100644 --- a/OvmfPkg/Library/PlatformInitLib/PlatformInitLib.inf +++ b/OvmfPkg/Library/PlatformInitLib/PlatformInitLib.inf @@ -32,6 +32,7 @@ [Sources.X64] IntelTdx.c + X64/Paging.nasm [Packages] EmbeddedPkg/EmbeddedPkg.dec @@ -63,6 +64,7 @@ [Pcd] gEfiMdePkgTokenSpaceGuid.PcdPciExpressBaseAddress gEfiMdeModulePkgTokenSpaceGuid.PcdUse1GPageTable + gEfiMdeModulePkgTokenSpaceGuid.PcdUse5LevelPageTable [FixedPcd] gUefiOvmfPkgTokenSpaceGuid.PcdOvmfWorkAreaBase diff --git a/OvmfPkg/Library/PlatformInitLib/X64/Paging.nasm b/OvmfPkg/Library/PlatformInitLib/X64/Paging.nasm new file mode 100644 index 0000000000..895a80950e --- /dev/null +++ b/OvmfPkg/Library/PlatformInitLib/X64/Paging.nasm @@ -0,0 +1,76 @@ +;------------------------------------------------------------------------------ +; @file +; +; Switch from 5-level paging mode to 4-level paging mode. +; +; This assumes everything (code, stack, page tables) is in 32-bit +; address space. Which is true for PEI phase even in X64 builds +; because low memory is used for early firmware setup. +; +; This also assumes the standard ResetVector GDT is active. +; +; SPDX-License-Identifier: BSD-2-Clause-Patent +;------------------------------------------------------------------------------ + +SECTION .text +BITS 64 + +global ASM_PFX(Switch4Level) +ASM_PFX(Switch4Level): + + ; save regs + push rax + push rbx + push rcx + push rdx + + ; cs:ip for long mode + lea rax, [rel Switch4Level64] + mov rbx, 0x3800000000 ; LINEAR_CODE64_SEL << 32 + or rax, rbx + push rax + + ; cs:ip for 32-bit mode + lea rax, [rel Switch4Level32] + mov rbx, 0x1000000000 ; LINEAR_CODE_SEL << 32 + or rax, rbx + push rax + + ; enter 32-bit mode + retf + +Switch4Level64: + ; restore regs + pop rdx + pop rcx + pop rbx + pop rax + + ret + +BITS 32 + +Switch4Level32: + ; disable paging + mov eax, cr0 + btc eax, 31 ; clear PG + mov cr0, eax + + ; disable 5-level paging + mov eax, cr4 + btc eax, 12 ; clear la57 + mov cr4, eax + + ; fixup cr3 (dereference 5th level) + mov eax, cr3 + mov eax, [ eax ] + and eax, 0xfffff000 + mov cr3, eax + + ; enable paging + mov eax, cr0 + bts eax, 31 ; set PG + mov cr0, eax + + ; back to long mode + retf