audk/OvmfPkg/Bhyve/PlatformPei/MemDetect.c

628 lines
18 KiB
C
Raw Normal View History

/**@file
Memory Detection for Virtual Machines.
Copyright (c) 2020, Rebecca Cran <rebecca@bsdio.com>
Copyright (c) 2006 - 2016, Intel Corporation. All rights reserved.<BR>
SPDX-License-Identifier: BSD-2-Clause-Patent
Module Name:
MemDetect.c
**/
//
// The package level header files this module uses
//
#include <IndustryStandard/E820.h>
#include <IndustryStandard/Q35MchIch9.h>
#include <PiPei.h>
//
// The Library classes this module consumes
//
#include <Library/BaseLib.h>
#include <Library/BaseMemoryLib.h>
#include <Library/DebugLib.h>
#include <Library/HobLib.h>
#include <Library/IoLib.h>
#include <Library/PcdLib.h>
#include <Library/PciLib.h>
#include <Library/PeimEntryPoint.h>
#include <Library/ResourcePublicationLib.h>
#include <Library/MtrrLib.h>
#include "Platform.h"
#include "Cmos.h"
UINT8 mPhysMemAddressWidth;
STATIC UINT32 mS3AcpiReservedMemoryBase;
STATIC UINT32 mS3AcpiReservedMemorySize;
STATIC UINT16 mQ35TsegMbytes;
BOOLEAN mQ35SmramAtDefaultSmbase = FALSE;
VOID
Q35TsegMbytesInitialization (
VOID
)
{
UINT16 ExtendedTsegMbytes;
RETURN_STATUS PcdStatus;
if (mHostBridgeDevId != INTEL_Q35_MCH_DEVICE_ID) {
DEBUG ((
DEBUG_ERROR,
"%a: no TSEG (SMRAM) on host bridge DID=0x%04x; "
"only DID=0x%04x (Q35) is supported\n",
__FUNCTION__,
mHostBridgeDevId,
INTEL_Q35_MCH_DEVICE_ID
));
ASSERT (FALSE);
CpuDeadLoop ();
}
//
// Check if QEMU offers an extended TSEG.
//
// This can be seen from writing MCH_EXT_TSEG_MB_QUERY to the MCH_EXT_TSEG_MB
// register, and reading back the register.
//
// On a QEMU machine type that does not offer an extended TSEG, the initial
// write overwrites whatever value a malicious guest OS may have placed in
// the (unimplemented) register, before entering S3 or rebooting.
// Subsequently, the read returns MCH_EXT_TSEG_MB_QUERY unchanged.
//
// On a QEMU machine type that offers an extended TSEG, the initial write
// triggers an update to the register. Subsequently, the value read back
// (which is guaranteed to differ from MCH_EXT_TSEG_MB_QUERY) tells us the
// number of megabytes.
//
PciWrite16 (DRAMC_REGISTER_Q35 (MCH_EXT_TSEG_MB), MCH_EXT_TSEG_MB_QUERY);
ExtendedTsegMbytes = PciRead16 (DRAMC_REGISTER_Q35 (MCH_EXT_TSEG_MB));
if (ExtendedTsegMbytes == MCH_EXT_TSEG_MB_QUERY) {
mQ35TsegMbytes = PcdGet16 (PcdQ35TsegMbytes);
return;
}
DEBUG ((
DEBUG_INFO,
"%a: QEMU offers an extended TSEG (%d MB)\n",
__FUNCTION__,
ExtendedTsegMbytes
));
PcdStatus = PcdSet16S (PcdQ35TsegMbytes, ExtendedTsegMbytes);
ASSERT_RETURN_ERROR (PcdStatus);
mQ35TsegMbytes = ExtendedTsegMbytes;
}
UINT32
GetSystemMemorySizeBelow4gb (
VOID
)
{
UINT8 Cmos0x34;
UINT8 Cmos0x35;
//
// CMOS 0x34/0x35 specifies the system memory above 16 MB.
// * CMOS(0x35) is the high byte
// * CMOS(0x34) is the low byte
// * The size is specified in 64kb chunks
// * Since this is memory above 16MB, the 16MB must be added
// into the calculation to get the total memory size.
//
Cmos0x34 = (UINT8) CmosRead8 (0x34);
Cmos0x35 = (UINT8) CmosRead8 (0x35);
return (UINT32) (((UINTN)((Cmos0x35 << 8) + Cmos0x34) << 16) + SIZE_16MB);
}
STATIC
UINT64
GetSystemMemorySizeAbove4gb (
)
{
UINT32 Size;
UINTN CmosIndex;
//
// CMOS 0x5b-0x5d specifies the system memory above 4GB MB.
// * CMOS(0x5d) is the most significant size byte
// * CMOS(0x5c) is the middle size byte
// * CMOS(0x5b) is the least significant size byte
// * The size is specified in 64kb chunks
//
Size = 0;
for (CmosIndex = 0x5d; CmosIndex >= 0x5b; CmosIndex--) {
Size = (UINT32) (Size << 8) + (UINT32) CmosRead8 (CmosIndex);
}
return LShiftU64 (Size, 16);
}
/**
Return the highest address that DXE could possibly use, plus one.
**/
STATIC
UINT64
GetFirstNonAddress (
VOID
)
{
UINT64 FirstNonAddress;
UINT64 Pci64Base, Pci64Size;
RETURN_STATUS PcdStatus;
FirstNonAddress = BASE_4GB + GetSystemMemorySizeAbove4gb ();
//
// If DXE is 32-bit, then we're done; PciBusDxe will degrade 64-bit MMIO
// resources to 32-bit anyway. See DegradeResource() in
// "PciResourceSupport.c".
//
#ifdef MDE_CPU_IA32
if (!FeaturePcdGet (PcdDxeIplSwitchToLongMode)) {
return FirstNonAddress;
}
#endif
//
// Otherwise, in order to calculate the highest address plus one, we must
// consider the 64-bit PCI host aperture too. Fetch the default size.
//
Pci64Size = PcdGet64 (PcdPciMmio64Size);
if (Pci64Size == 0) {
if (mBootMode != BOOT_ON_S3_RESUME) {
DEBUG ((DEBUG_INFO, "%a: disabling 64-bit PCI host aperture\n",
__FUNCTION__));
PcdStatus = PcdSet64S (PcdPciMmio64Size, 0);
ASSERT_RETURN_ERROR (PcdStatus);
}
//
// There's nothing more to do; the amount of memory above 4GB fully
// determines the highest address plus one. The memory hotplug area (see
// below) plays no role for the firmware in this case.
//
return FirstNonAddress;
}
//
// SeaBIOS aligns both boundaries of the 64-bit PCI host aperture to 1GB, so
// that the host can map it with 1GB hugepages. Follow suit.
//
Pci64Base = ALIGN_VALUE (FirstNonAddress, (UINT64)SIZE_1GB);
Pci64Size = ALIGN_VALUE (Pci64Size, (UINT64)SIZE_1GB);
//
// The 64-bit PCI host aperture should also be "naturally" aligned. The
// alignment is determined by rounding the size of the aperture down to the
// next smaller or equal power of two. That is, align the aperture by the
// largest BAR size that can fit into it.
//
Pci64Base = ALIGN_VALUE (Pci64Base, GetPowerOfTwo64 (Pci64Size));
if (mBootMode != BOOT_ON_S3_RESUME) {
//
// The core PciHostBridgeDxe driver will automatically add this range to
// the GCD memory space map through our PciHostBridgeLib instance; here we
// only need to set the PCDs.
//
PcdStatus = PcdSet64S (PcdPciMmio64Base, Pci64Base);
ASSERT_RETURN_ERROR (PcdStatus);
PcdStatus = PcdSet64S (PcdPciMmio64Size, Pci64Size);
ASSERT_RETURN_ERROR (PcdStatus);
DEBUG ((DEBUG_INFO, "%a: Pci64Base=0x%Lx Pci64Size=0x%Lx\n",
__FUNCTION__, Pci64Base, Pci64Size));
}
//
// The useful address space ends with the 64-bit PCI host aperture.
//
FirstNonAddress = Pci64Base + Pci64Size;
return FirstNonAddress;
}
/**
Initialize the mPhysMemAddressWidth variable, based on guest RAM size.
**/
VOID
AddressWidthInitialization (
VOID
)
{
UINT64 FirstNonAddress;
//
// As guest-physical memory size grows, the permanent PEI RAM requirements
// are dominated by the identity-mapping page tables built by the DXE IPL.
// The DXL IPL keys off of the physical address bits advertized in the CPU
// HOB. To conserve memory, we calculate the minimum address width here.
//
FirstNonAddress = GetFirstNonAddress ();
mPhysMemAddressWidth = (UINT8)HighBitSet64 (FirstNonAddress);
//
// If FirstNonAddress is not an integral power of two, then we need an
// additional bit.
//
if ((FirstNonAddress & (FirstNonAddress - 1)) != 0) {
++mPhysMemAddressWidth;
}
//
// The minimum address width is 36 (covers up to and excluding 64 GB, which
// is the maximum for Ia32 + PAE). The theoretical architecture maximum for
// X64 long mode is 52 bits, but the DXE IPL clamps that down to 48 bits. We
// can simply assert that here, since 48 bits are good enough for 256 TB.
//
if (mPhysMemAddressWidth <= 36) {
mPhysMemAddressWidth = 36;
}
ASSERT (mPhysMemAddressWidth <= 48);
}
/**
Calculate the cap for the permanent PEI memory.
**/
STATIC
UINT32
GetPeiMemoryCap (
VOID
)
{
BOOLEAN Page1GSupport;
UINT32 RegEax;
UINT32 RegEdx;
UINT32 Pml4Entries;
UINT32 PdpEntries;
UINTN TotalPages;
//
// If DXE is 32-bit, then just return the traditional 64 MB cap.
//
#ifdef MDE_CPU_IA32
if (!FeaturePcdGet (PcdDxeIplSwitchToLongMode)) {
return SIZE_64MB;
}
#endif
//
// Dependent on physical address width, PEI memory allocations can be
// dominated by the page tables built for 64-bit DXE. So we key the cap off
// of those. The code below is based on CreateIdentityMappingPageTables() in
// "MdeModulePkg/Core/DxeIplPeim/X64/VirtualMemory.c".
//
Page1GSupport = FALSE;
if (PcdGetBool (PcdUse1GPageTable)) {
AsmCpuid (0x80000000, &RegEax, NULL, NULL, NULL);
if (RegEax >= 0x80000001) {
AsmCpuid (0x80000001, NULL, NULL, NULL, &RegEdx);
if ((RegEdx & BIT26) != 0) {
Page1GSupport = TRUE;
}
}
}
if (mPhysMemAddressWidth <= 39) {
Pml4Entries = 1;
PdpEntries = 1 << (mPhysMemAddressWidth - 30);
ASSERT (PdpEntries <= 0x200);
} else {
Pml4Entries = 1 << (mPhysMemAddressWidth - 39);
ASSERT (Pml4Entries <= 0x200);
PdpEntries = 512;
}
TotalPages = Page1GSupport ? Pml4Entries + 1 :
(PdpEntries + 1) * Pml4Entries + 1;
ASSERT (TotalPages <= 0x40201);
//
// Add 64 MB for miscellaneous allocations. Note that for
// mPhysMemAddressWidth values close to 36, the cap will actually be
// dominated by this increment.
//
return (UINT32)(EFI_PAGES_TO_SIZE (TotalPages) + SIZE_64MB);
}
/**
Publish PEI core memory
@return EFI_SUCCESS The PEIM initialized successfully.
**/
EFI_STATUS
PublishPeiMemory (
VOID
)
{
EFI_STATUS Status;
EFI_PHYSICAL_ADDRESS MemoryBase;
UINT64 MemorySize;
UINT32 LowerMemorySize;
UINT32 PeiMemoryCap;
LowerMemorySize = GetSystemMemorySizeBelow4gb ();
if (FeaturePcdGet (PcdSmmSmramRequire)) {
//
// TSEG is chipped from the end of low RAM
//
LowerMemorySize -= mQ35TsegMbytes * SIZE_1MB;
}
//
// If S3 is supported, then the S3 permanent PEI memory is placed next,
// downwards. Its size is primarily dictated by CpuMpPei. The formula below
// is an approximation.
//
if (mS3Supported) {
mS3AcpiReservedMemorySize = SIZE_512KB +
mMaxCpuCount *
PcdGet32 (PcdCpuApStackSize);
mS3AcpiReservedMemoryBase = LowerMemorySize - mS3AcpiReservedMemorySize;
LowerMemorySize = mS3AcpiReservedMemoryBase;
}
if (mBootMode == BOOT_ON_S3_RESUME) {
MemoryBase = mS3AcpiReservedMemoryBase;
MemorySize = mS3AcpiReservedMemorySize;
} else {
PeiMemoryCap = GetPeiMemoryCap ();
DEBUG ((DEBUG_INFO, "%a: mPhysMemAddressWidth=%d PeiMemoryCap=%u KB\n",
__FUNCTION__, mPhysMemAddressWidth, PeiMemoryCap >> 10));
//
// Determine the range of memory to use during PEI
//
// Technically we could lay the permanent PEI RAM over SEC's temporary
// decompression and scratch buffer even if "secure S3" is needed, since
// their lifetimes don't overlap. However, PeiFvInitialization() will cover
// RAM up to PcdOvmfDecompressionScratchEnd with an EfiACPIMemoryNVS memory
// allocation HOB, and other allocations served from the permanent PEI RAM
// shouldn't overlap with that HOB.
//
MemoryBase = mS3Supported && FeaturePcdGet (PcdSmmSmramRequire) ?
PcdGet32 (PcdOvmfDecompressionScratchEnd) :
PcdGet32 (PcdOvmfDxeMemFvBase) + PcdGet32 (PcdOvmfDxeMemFvSize);
MemorySize = LowerMemorySize - MemoryBase;
if (MemorySize > PeiMemoryCap) {
MemoryBase = LowerMemorySize - PeiMemoryCap;
MemorySize = PeiMemoryCap;
}
}
//
// Publish this memory to the PEI Core
//
Status = PublishSystemMemory(MemoryBase, MemorySize);
ASSERT_EFI_ERROR (Status);
return Status;
}
/**
Peform Memory Detection for QEMU / KVM
**/
STATIC
VOID
QemuInitializeRam (
VOID
)
{
UINT64 LowerMemorySize;
UINT64 UpperMemorySize;
MTRR_SETTINGS MtrrSettings;
EFI_STATUS Status;
DEBUG ((DEBUG_INFO, "%a called\n", __FUNCTION__));
//
// Determine total memory size available
//
LowerMemorySize = GetSystemMemorySizeBelow4gb ();
UpperMemorySize = GetSystemMemorySizeAbove4gb ();
if (mBootMode == BOOT_ON_S3_RESUME) {
//
// Create the following memory HOB as an exception on the S3 boot path.
//
// Normally we'd create memory HOBs only on the normal boot path. However,
// CpuMpPei specifically needs such a low-memory HOB on the S3 path as
// well, for "borrowing" a subset of it temporarily, for the AP startup
// vector.
//
// CpuMpPei saves the original contents of the borrowed area in permanent
// PEI RAM, in a backup buffer allocated with the normal PEI services.
// CpuMpPei restores the original contents ("returns" the borrowed area) at
// End-of-PEI. End-of-PEI in turn is emitted by S3Resume2Pei before
// transferring control to the OS's wakeup vector in the FACS.
//
// We expect any other PEIMs that "borrow" memory similarly to CpuMpPei to
// restore the original contents. Furthermore, we expect all such PEIMs
// (CpuMpPei included) to claim the borrowed areas by producing memory
// allocation HOBs, and to honor preexistent memory allocation HOBs when
// looking for an area to borrow.
//
AddMemoryRangeHob (0, BASE_512KB + BASE_128KB);
} else {
//
// Create memory HOBs
//
AddMemoryRangeHob (0, BASE_512KB + BASE_128KB);
if (FeaturePcdGet (PcdSmmSmramRequire)) {
UINT32 TsegSize;
TsegSize = mQ35TsegMbytes * SIZE_1MB;
AddMemoryRangeHob (BASE_1MB, LowerMemorySize - TsegSize);
AddReservedMemoryBaseSizeHob (LowerMemorySize - TsegSize, TsegSize,
TRUE);
} else {
AddMemoryRangeHob (BASE_1MB, LowerMemorySize);
}
if (UpperMemorySize != 0) {
AddMemoryBaseSizeHob (BASE_4GB, UpperMemorySize);
}
}
//
// We'd like to keep the following ranges uncached:
// - [640 KB, 1 MB)
// - [LowerMemorySize, 4 GB)
//
// Everything else should be WB. Unfortunately, programming the inverse (ie.
// keeping the default UC, and configuring the complement set of the above as
// WB) is not reliable in general, because the end of the upper RAM can have
// practically any alignment, and we may not have enough variable MTRRs to
// cover it exactly.
//
if (IsMtrrSupported ()) {
MtrrGetAllMtrrs (&MtrrSettings);
//
// MTRRs disabled, fixed MTRRs disabled, default type is uncached
//
ASSERT ((MtrrSettings.MtrrDefType & BIT11) == 0);
ASSERT ((MtrrSettings.MtrrDefType & BIT10) == 0);
ASSERT ((MtrrSettings.MtrrDefType & 0xFF) == 0);
//
// flip default type to writeback
//
SetMem (&MtrrSettings.Fixed, sizeof MtrrSettings.Fixed, 0x06);
ZeroMem (&MtrrSettings.Variables, sizeof MtrrSettings.Variables);
MtrrSettings.MtrrDefType |= BIT11 | BIT10 | 6;
MtrrSetAllMtrrs (&MtrrSettings);
//
// Set memory range from 640KB to 1MB to uncacheable
//
Status = MtrrSetMemoryAttribute (BASE_512KB + BASE_128KB,
BASE_1MB - (BASE_512KB + BASE_128KB), CacheUncacheable);
ASSERT_EFI_ERROR (Status);
//
// Set memory range from the "top of lower RAM" (RAM below 4GB) to 4GB as
// uncacheable
//
Status = MtrrSetMemoryAttribute (LowerMemorySize,
SIZE_4GB - LowerMemorySize, CacheUncacheable);
ASSERT_EFI_ERROR (Status);
}
}
/**
Publish system RAM and reserve memory regions
**/
VOID
InitializeRamRegions (
VOID
)
{
QemuInitializeRam ();
if (mS3Supported && mBootMode != BOOT_ON_S3_RESUME) {
//
// This is the memory range that will be used for PEI on S3 resume
//
BuildMemoryAllocationHob (
mS3AcpiReservedMemoryBase,
mS3AcpiReservedMemorySize,
EfiACPIMemoryNVS
);
//
// Cover the initial RAM area used as stack and temporary PEI heap.
//
// This is reserved as ACPI NVS so it can be used on S3 resume.
//
BuildMemoryAllocationHob (
PcdGet32 (PcdOvmfSecPeiTempRamBase),
PcdGet32 (PcdOvmfSecPeiTempRamSize),
EfiACPIMemoryNVS
);
//
// SEC stores its table of GUIDed section handlers here.
//
BuildMemoryAllocationHob (
PcdGet64 (PcdGuidedExtractHandlerTableAddress),
PcdGet32 (PcdGuidedExtractHandlerTableSize),
EfiACPIMemoryNVS
);
#ifdef MDE_CPU_X64
//
// Reserve the initial page tables built by the reset vector code.
//
// Since this memory range will be used by the Reset Vector on S3
// resume, it must be reserved as ACPI NVS.
//
BuildMemoryAllocationHob (
(EFI_PHYSICAL_ADDRESS)(UINTN) PcdGet32 (PcdOvmfSecPageTablesBase),
(UINT64)(UINTN) PcdGet32 (PcdOvmfSecPageTablesSize),
EfiACPIMemoryNVS
);
#endif
}
if (mBootMode != BOOT_ON_S3_RESUME) {
if (!FeaturePcdGet (PcdSmmSmramRequire)) {
//
// Reserve the lock box storage area
//
// Since this memory range will be used on S3 resume, it must be
// reserved as ACPI NVS.
//
// If S3 is unsupported, then various drivers might still write to the
// LockBox area. We ought to prevent DXE from serving allocation requests
// such that they would overlap the LockBox storage.
//
ZeroMem (
(VOID*)(UINTN) PcdGet32 (PcdOvmfLockBoxStorageBase),
(UINTN) PcdGet32 (PcdOvmfLockBoxStorageSize)
);
BuildMemoryAllocationHob (
(EFI_PHYSICAL_ADDRESS)(UINTN) PcdGet32 (PcdOvmfLockBoxStorageBase),
(UINT64)(UINTN) PcdGet32 (PcdOvmfLockBoxStorageSize),
mS3Supported ? EfiACPIMemoryNVS : EfiBootServicesData
);
}
if (FeaturePcdGet (PcdSmmSmramRequire)) {
UINT32 TsegSize;
//
// Make sure the TSEG area that we reported as a reserved memory resource
// cannot be used for reserved memory allocations.
//
TsegSize = mQ35TsegMbytes * SIZE_1MB;
BuildMemoryAllocationHob (
GetSystemMemorySizeBelow4gb() - TsegSize,
TsegSize,
EfiReservedMemoryType
);
}
}
}