OvmfPkg/PlatformInitLib: allow switching to 4-level paging

There are a number of mostly older guests such as RHEL-7 which do not
support 5-level paging.  This patch adds support for switching from
5-level paging mode back to 4-level paging mode.  This is done in PEI,
after inspecting the address space needed (installed memory and
reservations configured via fw_cfg).

By default small guests (which need less than 1 TB) will use 4-level
paging mode.  There is a fw_cfg override though, so it is possible to
force the one or the other this way:

qemu-system-x86_64 -fw_cfg name=opt/org.tianocode/PagingLevel,string=5

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
This commit is contained in:
Gerd Hoffmann 2024-06-26 13:26:08 +02:00 committed by mergify[bot]
parent f6092b5e2b
commit 2069a63a8e
3 changed files with 185 additions and 0 deletions

View File

@ -898,6 +898,111 @@ PlatformScanHostProvided64BitPciMmioEnd (
return EFI_NOT_FOUND;
}
VOID
EFIAPI
Switch4Level (
VOID
);
/**
Configure x64 paging levels.
The OVMF ResetVector code will enter long mode with 5-level paging if the
following conditions are true:
(1) OVMF has been built with PcdUse5LevelPageTable = TRUE, and
(2) the CPU supports 5-level paging (aka la57), and
(3) the CPU supports gigabyte pages, and
(4) the VM is not running in SEV mode.
Condition (4) is a temporary stopgap for BaseMemEncryptSevLib not supporting
5-level paging yet.
This function looks at the virtual machine configuration, then decides
whenever it will continue to use 5-level paging or downgrade to 4-level
paging for better compatibility with older guest OS versions.
There is a fw_cfg config option to explicitly request 4 or 5-level paging
using 'qemu -fw_cfg name=opt/org.tianocode/PagingLevel,string=4|5'. If the
option is present the requested paging level will be used.
Should that not be the case the function checks the size of the address space
needed, which is the RAM installed plus fw_cfg reservations. The downgrade
to 4-level paging will happen for small guests where the address space needed
is lower than 1TB.
This function will also log the paging level used and the reason for that.
**/
STATIC
VOID
PlatformSetupPagingLevel (
IN OUT EFI_HOB_PLATFORM_INFO *PlatformInfoHob
)
{
#ifdef MDE_CPU_X64
UINT32 PagingLevel;
EFI_STATUS Status;
IA32_CR4 Cr4;
Cr4.UintN = AsmReadCr4 ();
if (!Cr4.Bits.LA57) {
/* The OvmfPkg ResetVector has NOT turned on 5-level paging, log the reason. */
if (!PcdGetBool (PcdUse5LevelPageTable)) {
DEBUG ((DEBUG_INFO, "%a: using 4-level paging (PcdUse5LevelPageTable disabled)\n", __func__));
} else {
DEBUG ((DEBUG_INFO, "%a: using 4-level paging (la57 not supported by cpu)\n", __func__));
}
return;
}
Status = QemuFwCfgParseUint32 (
"opt/org.tianocode/PagingLevel",
FALSE,
&PagingLevel
);
switch (Status) {
case EFI_NOT_FOUND:
if (PlatformInfoHob->FirstNonAddress < (1ll << 40)) {
//
// If the highest address actually used is below 1TB switch back into
// 4-level paging mode for better compatibility with older guests.
//
DEBUG ((DEBUG_INFO, "%a: using 4-level paging (default for small guest)\n", __func__));
PagingLevel = 4;
} else {
DEBUG ((DEBUG_INFO, "%a: using 5-level paging (default for large guest)\n", __func__));
PagingLevel = 5;
}
break;
case EFI_SUCCESS:
if ((PagingLevel != 4) && (PagingLevel != 5)) {
DEBUG ((DEBUG_INFO, "%a: invalid paging level in fw_cfg: %d\n", __func__, PagingLevel));
return;
}
DEBUG ((DEBUG_INFO, "%a: using %d-level paging (fw_cfg override)\n", __func__, PagingLevel));
break;
default:
DEBUG ((DEBUG_WARN, "%a: QemuFwCfgParseUint32: %r\n", __func__, Status));
return;
}
if (PagingLevel == 4) {
Switch4Level ();
}
if (PagingLevel == 5) {
/* The OvmfPkg ResetVector has turned on 5-level paging, nothing to do here. */
}
#endif
}
/**
Initialize the PhysMemAddressWidth field in PlatformInfoHob based on guest RAM size.
**/
@ -946,6 +1051,8 @@ PlatformAddressWidthInitialization (
PlatformGetFirstNonAddress (PlatformInfoHob);
}
PlatformSetupPagingLevel (PlatformInfoHob);
PlatformAddressWidthFromCpuid (PlatformInfoHob, TRUE);
if (PlatformInfoHob->PhysMemAddressWidth != 0) {
// physical address width is known

View File

@ -32,6 +32,7 @@
[Sources.X64]
IntelTdx.c
X64/Paging.nasm
[Packages]
EmbeddedPkg/EmbeddedPkg.dec
@ -63,6 +64,7 @@
[Pcd]
gEfiMdePkgTokenSpaceGuid.PcdPciExpressBaseAddress
gEfiMdeModulePkgTokenSpaceGuid.PcdUse1GPageTable
gEfiMdeModulePkgTokenSpaceGuid.PcdUse5LevelPageTable
[FixedPcd]
gUefiOvmfPkgTokenSpaceGuid.PcdOvmfWorkAreaBase

View File

@ -0,0 +1,76 @@
;------------------------------------------------------------------------------
; @file
;
; Switch from 5-level paging mode to 4-level paging mode.
;
; This assumes everything (code, stack, page tables) is in 32-bit
; address space. Which is true for PEI phase even in X64 builds
; because low memory is used for early firmware setup.
;
; This also assumes the standard ResetVector GDT is active.
;
; SPDX-License-Identifier: BSD-2-Clause-Patent
;------------------------------------------------------------------------------
SECTION .text
BITS 64
global ASM_PFX(Switch4Level)
ASM_PFX(Switch4Level):
; save regs
push rax
push rbx
push rcx
push rdx
; cs:ip for long mode
lea rax, [rel Switch4Level64]
mov rbx, 0x3800000000 ; LINEAR_CODE64_SEL << 32
or rax, rbx
push rax
; cs:ip for 32-bit mode
lea rax, [rel Switch4Level32]
mov rbx, 0x1000000000 ; LINEAR_CODE_SEL << 32
or rax, rbx
push rax
; enter 32-bit mode
retf
Switch4Level64:
; restore regs
pop rdx
pop rcx
pop rbx
pop rax
ret
BITS 32
Switch4Level32:
; disable paging
mov eax, cr0
btc eax, 31 ; clear PG
mov cr0, eax
; disable 5-level paging
mov eax, cr4
btc eax, 12 ; clear la57
mov cr4, eax
; fixup cr3 (dereference 5th level)
mov eax, cr3
mov eax, [ eax ]
and eax, 0xfffff000
mov cr3, eax
; enable paging
mov eax, cr0
bts eax, 31 ; set PG
mov cr0, eax
; back to long mode
retf