2009-05-27 23:10:18 +02:00
|
|
|
/**@file
|
|
|
|
Memory Detection for Virtual Machines.
|
|
|
|
|
2016-04-21 08:31:55 +02:00
|
|
|
Copyright (c) 2006 - 2016, Intel Corporation. All rights reserved.<BR>
|
2010-04-28 14:43:04 +02:00
|
|
|
This program and the accompanying materials
|
2009-05-27 23:10:18 +02:00
|
|
|
are licensed and made available under the terms and conditions of the BSD License
|
|
|
|
which accompanies this distribution. The full text of the license may be found at
|
|
|
|
http://opensource.org/licenses/bsd-license.php
|
|
|
|
|
|
|
|
THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
|
|
|
|
|
|
|
|
Module Name:
|
|
|
|
|
|
|
|
MemDetect.c
|
|
|
|
|
|
|
|
**/
|
|
|
|
|
|
|
|
//
|
|
|
|
// The package level header files this module uses
|
|
|
|
//
|
|
|
|
#include <PiPei.h>
|
|
|
|
|
|
|
|
//
|
|
|
|
// The Library classes this module consumes
|
|
|
|
//
|
2014-03-04 09:03:23 +01:00
|
|
|
#include <Library/BaseMemoryLib.h>
|
2009-05-27 23:10:18 +02:00
|
|
|
#include <Library/DebugLib.h>
|
|
|
|
#include <Library/HobLib.h>
|
|
|
|
#include <Library/IoLib.h>
|
2010-01-04 17:17:59 +01:00
|
|
|
#include <Library/PcdLib.h>
|
2009-05-27 23:10:18 +02:00
|
|
|
#include <Library/PeimEntryPoint.h>
|
|
|
|
#include <Library/ResourcePublicationLib.h>
|
2011-10-28 08:04:01 +02:00
|
|
|
#include <Library/MtrrLib.h>
|
OvmfPkg: PlatformPei: determine the 64-bit PCI host aperture for X64 DXE
The main observation about the 64-bit PCI host aperture is that it is the
highest part of the useful address space. It impacts the top of the GCD
memory space map, and, consequently, our maximum address width calculation
for the CPU HOB too.
Thus, modify the GetFirstNonAddress() function to consider the following
areas above the high RAM, while calculating the first non-address (i.e.,
the highest inclusive address, plus one):
- the memory hotplug area (optional, the size comes from QEMU),
- the 64-bit PCI host aperture (we set a default size).
While computing the first non-address, capture the base and the size of
the 64-bit PCI host aperture at once in PCDs, since they are natural parts
of the calculation.
(Similarly to how PcdPciMmio32* are not rewritten on the S3 resume path
(see the InitializePlatform() -> MemMapInitialization() condition), nor
are PcdPciMmio64*. Only the core PciHostBridgeDxe driver consumes them,
through our PciHostBridgeLib instance.)
Set 32GB as the default size for the aperture. Issue#59 mentions the
NVIDIA Tesla K80 as an assignable device. According to nvidia.com, these
cards may have 24GB of memory (probably 16GB + 8GB BARs).
As a strictly experimental feature, the user can specify the size of the
aperture (in MB) as well, with the QEMU option
-fw_cfg name=opt/ovmf/X-PciMmio64Mb,string=65536
The "X-" prefix follows the QEMU tradition (spelled "x-" there), meaning
that the property is experimental, unstable, and might go away any time.
Gerd has proposed heuristics for sizing the aperture automatically (based
on 1GB page support and PCPU address width), but such should be delayed to
a later patch (which may very well back out "X-PciMmio64Mb" then).
For "everyday" guests, the 32GB default for the aperture size shouldn't
impact the PEI memory demand (the size of the page tables that the DXE IPL
PEIM builds). Namely, we've never reported narrower than 36-bit addresses;
the DXE IPL PEIM has always built page tables for 64GB at least.
For the aperture to bump the address width above 36 bits, either the guest
must have quite a bit of memory itself (in which case the additional PEI
memory demand shouldn't matter), or the user must specify a large aperture
manually with "X-PciMmio64Mb" (and then he or she is also responsible for
giving enough RAM to the VM, to satisfy the PEI memory demand).
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Marcel Apfelbaum <marcel@redhat.com>
Cc: Thomas Lamprecht <t.lamprecht@proxmox.com>
Ref: https://github.com/tianocore/edk2/issues/59
Ref: http://www.nvidia.com/object/tesla-servers.html
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
2016-03-04 19:30:45 +01:00
|
|
|
#include <Library/QemuFwCfgLib.h>
|
2009-05-27 23:10:18 +02:00
|
|
|
|
|
|
|
#include "Platform.h"
|
|
|
|
#include "Cmos.h"
|
|
|
|
|
2015-06-26 18:09:39 +02:00
|
|
|
UINT8 mPhysMemAddressWidth;
|
|
|
|
|
2014-02-01 22:22:43 +01:00
|
|
|
UINT32
|
2011-01-21 17:51:00 +01:00
|
|
|
GetSystemMemorySizeBelow4gb (
|
2014-02-01 22:22:43 +01:00
|
|
|
VOID
|
2009-05-27 23:10:18 +02:00
|
|
|
)
|
|
|
|
{
|
|
|
|
UINT8 Cmos0x34;
|
|
|
|
UINT8 Cmos0x35;
|
|
|
|
|
|
|
|
//
|
|
|
|
// CMOS 0x34/0x35 specifies the system memory above 16 MB.
|
|
|
|
// * CMOS(0x35) is the high byte
|
|
|
|
// * CMOS(0x34) is the low byte
|
|
|
|
// * The size is specified in 64kb chunks
|
|
|
|
// * Since this is memory above 16MB, the 16MB must be added
|
|
|
|
// into the calculation to get the total memory size.
|
|
|
|
//
|
|
|
|
|
|
|
|
Cmos0x34 = (UINT8) CmosRead8 (0x34);
|
|
|
|
Cmos0x35 = (UINT8) CmosRead8 (0x35);
|
|
|
|
|
2014-09-25 04:29:10 +02:00
|
|
|
return (UINT32) (((UINTN)((Cmos0x35 << 8) + Cmos0x34) << 16) + SIZE_16MB);
|
2009-05-27 23:10:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-01-21 17:51:00 +01:00
|
|
|
STATIC
|
|
|
|
UINT64
|
|
|
|
GetSystemMemorySizeAbove4gb (
|
|
|
|
)
|
|
|
|
{
|
|
|
|
UINT32 Size;
|
|
|
|
UINTN CmosIndex;
|
|
|
|
|
|
|
|
//
|
|
|
|
// CMOS 0x5b-0x5d specifies the system memory above 4GB MB.
|
|
|
|
// * CMOS(0x5d) is the most significant size byte
|
|
|
|
// * CMOS(0x5c) is the middle size byte
|
|
|
|
// * CMOS(0x5b) is the least significant size byte
|
|
|
|
// * The size is specified in 64kb chunks
|
|
|
|
//
|
|
|
|
|
|
|
|
Size = 0;
|
|
|
|
for (CmosIndex = 0x5d; CmosIndex >= 0x5b; CmosIndex--) {
|
|
|
|
Size = (UINT32) (Size << 8) + (UINT32) CmosRead8 (CmosIndex);
|
|
|
|
}
|
|
|
|
|
|
|
|
return LShiftU64 (Size, 16);
|
|
|
|
}
|
|
|
|
|
2015-06-26 18:09:39 +02:00
|
|
|
|
2016-03-04 17:23:35 +01:00
|
|
|
/**
|
|
|
|
Return the highest address that DXE could possibly use, plus one.
|
|
|
|
**/
|
|
|
|
STATIC
|
|
|
|
UINT64
|
|
|
|
GetFirstNonAddress (
|
|
|
|
VOID
|
|
|
|
)
|
|
|
|
{
|
|
|
|
UINT64 FirstNonAddress;
|
OvmfPkg: PlatformPei: determine the 64-bit PCI host aperture for X64 DXE
The main observation about the 64-bit PCI host aperture is that it is the
highest part of the useful address space. It impacts the top of the GCD
memory space map, and, consequently, our maximum address width calculation
for the CPU HOB too.
Thus, modify the GetFirstNonAddress() function to consider the following
areas above the high RAM, while calculating the first non-address (i.e.,
the highest inclusive address, plus one):
- the memory hotplug area (optional, the size comes from QEMU),
- the 64-bit PCI host aperture (we set a default size).
While computing the first non-address, capture the base and the size of
the 64-bit PCI host aperture at once in PCDs, since they are natural parts
of the calculation.
(Similarly to how PcdPciMmio32* are not rewritten on the S3 resume path
(see the InitializePlatform() -> MemMapInitialization() condition), nor
are PcdPciMmio64*. Only the core PciHostBridgeDxe driver consumes them,
through our PciHostBridgeLib instance.)
Set 32GB as the default size for the aperture. Issue#59 mentions the
NVIDIA Tesla K80 as an assignable device. According to nvidia.com, these
cards may have 24GB of memory (probably 16GB + 8GB BARs).
As a strictly experimental feature, the user can specify the size of the
aperture (in MB) as well, with the QEMU option
-fw_cfg name=opt/ovmf/X-PciMmio64Mb,string=65536
The "X-" prefix follows the QEMU tradition (spelled "x-" there), meaning
that the property is experimental, unstable, and might go away any time.
Gerd has proposed heuristics for sizing the aperture automatically (based
on 1GB page support and PCPU address width), but such should be delayed to
a later patch (which may very well back out "X-PciMmio64Mb" then).
For "everyday" guests, the 32GB default for the aperture size shouldn't
impact the PEI memory demand (the size of the page tables that the DXE IPL
PEIM builds). Namely, we've never reported narrower than 36-bit addresses;
the DXE IPL PEIM has always built page tables for 64GB at least.
For the aperture to bump the address width above 36 bits, either the guest
must have quite a bit of memory itself (in which case the additional PEI
memory demand shouldn't matter), or the user must specify a large aperture
manually with "X-PciMmio64Mb" (and then he or she is also responsible for
giving enough RAM to the VM, to satisfy the PEI memory demand).
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Marcel Apfelbaum <marcel@redhat.com>
Cc: Thomas Lamprecht <t.lamprecht@proxmox.com>
Ref: https://github.com/tianocore/edk2/issues/59
Ref: http://www.nvidia.com/object/tesla-servers.html
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
2016-03-04 19:30:45 +01:00
|
|
|
UINT64 Pci64Base, Pci64Size;
|
|
|
|
CHAR8 MbString[7 + 1];
|
|
|
|
EFI_STATUS Status;
|
|
|
|
FIRMWARE_CONFIG_ITEM FwCfgItem;
|
|
|
|
UINTN FwCfgSize;
|
|
|
|
UINT64 HotPlugMemoryEnd;
|
2016-03-04 17:23:35 +01:00
|
|
|
|
|
|
|
FirstNonAddress = BASE_4GB + GetSystemMemorySizeAbove4gb ();
|
OvmfPkg: PlatformPei: determine the 64-bit PCI host aperture for X64 DXE
The main observation about the 64-bit PCI host aperture is that it is the
highest part of the useful address space. It impacts the top of the GCD
memory space map, and, consequently, our maximum address width calculation
for the CPU HOB too.
Thus, modify the GetFirstNonAddress() function to consider the following
areas above the high RAM, while calculating the first non-address (i.e.,
the highest inclusive address, plus one):
- the memory hotplug area (optional, the size comes from QEMU),
- the 64-bit PCI host aperture (we set a default size).
While computing the first non-address, capture the base and the size of
the 64-bit PCI host aperture at once in PCDs, since they are natural parts
of the calculation.
(Similarly to how PcdPciMmio32* are not rewritten on the S3 resume path
(see the InitializePlatform() -> MemMapInitialization() condition), nor
are PcdPciMmio64*. Only the core PciHostBridgeDxe driver consumes them,
through our PciHostBridgeLib instance.)
Set 32GB as the default size for the aperture. Issue#59 mentions the
NVIDIA Tesla K80 as an assignable device. According to nvidia.com, these
cards may have 24GB of memory (probably 16GB + 8GB BARs).
As a strictly experimental feature, the user can specify the size of the
aperture (in MB) as well, with the QEMU option
-fw_cfg name=opt/ovmf/X-PciMmio64Mb,string=65536
The "X-" prefix follows the QEMU tradition (spelled "x-" there), meaning
that the property is experimental, unstable, and might go away any time.
Gerd has proposed heuristics for sizing the aperture automatically (based
on 1GB page support and PCPU address width), but such should be delayed to
a later patch (which may very well back out "X-PciMmio64Mb" then).
For "everyday" guests, the 32GB default for the aperture size shouldn't
impact the PEI memory demand (the size of the page tables that the DXE IPL
PEIM builds). Namely, we've never reported narrower than 36-bit addresses;
the DXE IPL PEIM has always built page tables for 64GB at least.
For the aperture to bump the address width above 36 bits, either the guest
must have quite a bit of memory itself (in which case the additional PEI
memory demand shouldn't matter), or the user must specify a large aperture
manually with "X-PciMmio64Mb" (and then he or she is also responsible for
giving enough RAM to the VM, to satisfy the PEI memory demand).
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Marcel Apfelbaum <marcel@redhat.com>
Cc: Thomas Lamprecht <t.lamprecht@proxmox.com>
Ref: https://github.com/tianocore/edk2/issues/59
Ref: http://www.nvidia.com/object/tesla-servers.html
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
2016-03-04 19:30:45 +01:00
|
|
|
|
|
|
|
//
|
|
|
|
// If DXE is 32-bit, then we're done; PciBusDxe will degrade 64-bit MMIO
|
|
|
|
// resources to 32-bit anyway. See DegradeResource() in
|
|
|
|
// "PciResourceSupport.c".
|
|
|
|
//
|
|
|
|
#ifdef MDE_CPU_IA32
|
|
|
|
if (!FeaturePcdGet (PcdDxeIplSwitchToLongMode)) {
|
|
|
|
return FirstNonAddress;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
//
|
|
|
|
// Otherwise, in order to calculate the highest address plus one, we must
|
|
|
|
// consider the 64-bit PCI host aperture too. Fetch the default size.
|
|
|
|
//
|
|
|
|
Pci64Size = PcdGet64 (PcdPciMmio64Size);
|
|
|
|
|
|
|
|
//
|
|
|
|
// See if the user specified the number of megabytes for the 64-bit PCI host
|
|
|
|
// aperture. The number of non-NUL characters in MbString allows for
|
|
|
|
// 9,999,999 MB, which is approximately 10 TB.
|
|
|
|
//
|
|
|
|
// As signaled by the "X-" prefix, this knob is experimental, and might go
|
|
|
|
// away at any time.
|
|
|
|
//
|
|
|
|
Status = QemuFwCfgFindFile ("opt/ovmf/X-PciMmio64Mb", &FwCfgItem,
|
|
|
|
&FwCfgSize);
|
|
|
|
if (!EFI_ERROR (Status)) {
|
|
|
|
if (FwCfgSize >= sizeof MbString) {
|
|
|
|
DEBUG ((EFI_D_WARN,
|
|
|
|
"%a: ignoring malformed 64-bit PCI host aperture size from fw_cfg\n",
|
|
|
|
__FUNCTION__));
|
|
|
|
} else {
|
|
|
|
QemuFwCfgSelectItem (FwCfgItem);
|
|
|
|
QemuFwCfgReadBytes (FwCfgSize, MbString);
|
|
|
|
MbString[FwCfgSize] = '\0';
|
|
|
|
Pci64Size = LShiftU64 (AsciiStrDecimalToUint64 (MbString), 20);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Pci64Size == 0) {
|
|
|
|
if (mBootMode != BOOT_ON_S3_RESUME) {
|
|
|
|
DEBUG ((EFI_D_INFO, "%a: disabling 64-bit PCI host aperture\n",
|
|
|
|
__FUNCTION__));
|
|
|
|
PcdSet64 (PcdPciMmio64Size, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// There's nothing more to do; the amount of memory above 4GB fully
|
|
|
|
// determines the highest address plus one. The memory hotplug area (see
|
|
|
|
// below) plays no role for the firmware in this case.
|
|
|
|
//
|
|
|
|
return FirstNonAddress;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// The "etc/reserved-memory-end" fw_cfg file, when present, contains an
|
|
|
|
// absolute, exclusive end address for the memory hotplug area. This area
|
|
|
|
// starts right at the end of the memory above 4GB. The 64-bit PCI host
|
|
|
|
// aperture must be placed above it.
|
|
|
|
//
|
|
|
|
Status = QemuFwCfgFindFile ("etc/reserved-memory-end", &FwCfgItem,
|
|
|
|
&FwCfgSize);
|
|
|
|
if (!EFI_ERROR (Status) && FwCfgSize == sizeof HotPlugMemoryEnd) {
|
|
|
|
QemuFwCfgSelectItem (FwCfgItem);
|
|
|
|
QemuFwCfgReadBytes (FwCfgSize, &HotPlugMemoryEnd);
|
|
|
|
|
|
|
|
ASSERT (HotPlugMemoryEnd >= FirstNonAddress);
|
|
|
|
FirstNonAddress = HotPlugMemoryEnd;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// SeaBIOS aligns both boundaries of the 64-bit PCI host aperture to 1GB, so
|
|
|
|
// that the host can map it with 1GB hugepages. Follow suit.
|
|
|
|
//
|
|
|
|
Pci64Base = ALIGN_VALUE (FirstNonAddress, (UINT64)SIZE_1GB);
|
|
|
|
Pci64Size = ALIGN_VALUE (Pci64Size, (UINT64)SIZE_1GB);
|
|
|
|
|
|
|
|
//
|
|
|
|
// The 64-bit PCI host aperture should also be "naturally" aligned. The
|
|
|
|
// alignment is determined by rounding the size of the aperture down to the
|
|
|
|
// next smaller or equal power of two. That is, align the aperture by the
|
|
|
|
// largest BAR size that can fit into it.
|
|
|
|
//
|
|
|
|
Pci64Base = ALIGN_VALUE (Pci64Base, GetPowerOfTwo64 (Pci64Size));
|
|
|
|
|
|
|
|
if (mBootMode != BOOT_ON_S3_RESUME) {
|
|
|
|
//
|
|
|
|
// The core PciHostBridgeDxe driver will automatically add this range to
|
|
|
|
// the GCD memory space map through our PciHostBridgeLib instance; here we
|
|
|
|
// only need to set the PCDs.
|
|
|
|
//
|
|
|
|
PcdSet64 (PcdPciMmio64Base, Pci64Base);
|
|
|
|
PcdSet64 (PcdPciMmio64Size, Pci64Size);
|
|
|
|
DEBUG ((EFI_D_INFO, "%a: Pci64Base=0x%Lx Pci64Size=0x%Lx\n",
|
|
|
|
__FUNCTION__, Pci64Base, Pci64Size));
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// The useful address space ends with the 64-bit PCI host aperture.
|
|
|
|
//
|
|
|
|
FirstNonAddress = Pci64Base + Pci64Size;
|
2016-03-04 17:23:35 +01:00
|
|
|
return FirstNonAddress;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-06-26 18:09:39 +02:00
|
|
|
/**
|
|
|
|
Initialize the mPhysMemAddressWidth variable, based on guest RAM size.
|
|
|
|
**/
|
|
|
|
VOID
|
|
|
|
AddressWidthInitialization (
|
|
|
|
VOID
|
|
|
|
)
|
|
|
|
{
|
|
|
|
UINT64 FirstNonAddress;
|
|
|
|
|
|
|
|
//
|
|
|
|
// As guest-physical memory size grows, the permanent PEI RAM requirements
|
|
|
|
// are dominated by the identity-mapping page tables built by the DXE IPL.
|
|
|
|
// The DXL IPL keys off of the physical address bits advertized in the CPU
|
|
|
|
// HOB. To conserve memory, we calculate the minimum address width here.
|
|
|
|
//
|
2016-03-04 17:23:35 +01:00
|
|
|
FirstNonAddress = GetFirstNonAddress ();
|
2015-06-26 18:09:39 +02:00
|
|
|
mPhysMemAddressWidth = (UINT8)HighBitSet64 (FirstNonAddress);
|
|
|
|
|
|
|
|
//
|
|
|
|
// If FirstNonAddress is not an integral power of two, then we need an
|
|
|
|
// additional bit.
|
|
|
|
//
|
|
|
|
if ((FirstNonAddress & (FirstNonAddress - 1)) != 0) {
|
|
|
|
++mPhysMemAddressWidth;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// The minimum address width is 36 (covers up to and excluding 64 GB, which
|
|
|
|
// is the maximum for Ia32 + PAE). The theoretical architecture maximum for
|
|
|
|
// X64 long mode is 52 bits, but the DXE IPL clamps that down to 48 bits. We
|
|
|
|
// can simply assert that here, since 48 bits are good enough for 256 TB.
|
|
|
|
//
|
|
|
|
if (mPhysMemAddressWidth <= 36) {
|
|
|
|
mPhysMemAddressWidth = 36;
|
|
|
|
}
|
|
|
|
ASSERT (mPhysMemAddressWidth <= 48);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
Calculate the cap for the permanent PEI memory.
|
|
|
|
**/
|
|
|
|
STATIC
|
|
|
|
UINT32
|
|
|
|
GetPeiMemoryCap (
|
|
|
|
VOID
|
|
|
|
)
|
|
|
|
{
|
|
|
|
BOOLEAN Page1GSupport;
|
|
|
|
UINT32 RegEax;
|
|
|
|
UINT32 RegEdx;
|
|
|
|
UINT32 Pml4Entries;
|
|
|
|
UINT32 PdpEntries;
|
|
|
|
UINTN TotalPages;
|
|
|
|
|
|
|
|
//
|
|
|
|
// If DXE is 32-bit, then just return the traditional 64 MB cap.
|
|
|
|
//
|
|
|
|
#ifdef MDE_CPU_IA32
|
|
|
|
if (!FeaturePcdGet (PcdDxeIplSwitchToLongMode)) {
|
|
|
|
return SIZE_64MB;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
//
|
|
|
|
// Dependent on physical address width, PEI memory allocations can be
|
|
|
|
// dominated by the page tables built for 64-bit DXE. So we key the cap off
|
|
|
|
// of those. The code below is based on CreateIdentityMappingPageTables() in
|
|
|
|
// "MdeModulePkg/Core/DxeIplPeim/X64/VirtualMemory.c".
|
|
|
|
//
|
|
|
|
Page1GSupport = FALSE;
|
|
|
|
if (PcdGetBool (PcdUse1GPageTable)) {
|
|
|
|
AsmCpuid (0x80000000, &RegEax, NULL, NULL, NULL);
|
|
|
|
if (RegEax >= 0x80000001) {
|
|
|
|
AsmCpuid (0x80000001, NULL, NULL, NULL, &RegEdx);
|
|
|
|
if ((RegEdx & BIT26) != 0) {
|
|
|
|
Page1GSupport = TRUE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mPhysMemAddressWidth <= 39) {
|
|
|
|
Pml4Entries = 1;
|
|
|
|
PdpEntries = 1 << (mPhysMemAddressWidth - 30);
|
|
|
|
ASSERT (PdpEntries <= 0x200);
|
|
|
|
} else {
|
|
|
|
Pml4Entries = 1 << (mPhysMemAddressWidth - 39);
|
|
|
|
ASSERT (Pml4Entries <= 0x200);
|
|
|
|
PdpEntries = 512;
|
|
|
|
}
|
|
|
|
|
|
|
|
TotalPages = Page1GSupport ? Pml4Entries + 1 :
|
|
|
|
(PdpEntries + 1) * Pml4Entries + 1;
|
|
|
|
ASSERT (TotalPages <= 0x40201);
|
|
|
|
|
|
|
|
//
|
|
|
|
// Add 64 MB for miscellaneous allocations. Note that for
|
|
|
|
// mPhysMemAddressWidth values close to 36, the cap will actually be
|
|
|
|
// dominated by this increment.
|
|
|
|
//
|
|
|
|
return (UINT32)(EFI_PAGES_TO_SIZE (TotalPages) + SIZE_64MB);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-12-08 02:36:07 +01:00
|
|
|
/**
|
|
|
|
Publish PEI core memory
|
|
|
|
|
|
|
|
@return EFI_SUCCESS The PEIM initialized successfully.
|
|
|
|
|
|
|
|
**/
|
|
|
|
EFI_STATUS
|
|
|
|
PublishPeiMemory (
|
|
|
|
VOID
|
|
|
|
)
|
|
|
|
{
|
|
|
|
EFI_STATUS Status;
|
|
|
|
EFI_PHYSICAL_ADDRESS MemoryBase;
|
|
|
|
UINT64 MemorySize;
|
|
|
|
UINT64 LowerMemorySize;
|
2015-06-26 18:09:39 +02:00
|
|
|
UINT32 PeiMemoryCap;
|
2013-12-08 02:36:07 +01:00
|
|
|
|
2014-03-04 09:02:16 +01:00
|
|
|
if (mBootMode == BOOT_ON_S3_RESUME) {
|
|
|
|
MemoryBase = PcdGet32 (PcdS3AcpiReservedMemoryBase);
|
|
|
|
MemorySize = PcdGet32 (PcdS3AcpiReservedMemorySize);
|
|
|
|
} else {
|
|
|
|
LowerMemorySize = GetSystemMemorySizeBelow4gb ();
|
OvmfPkg: PlatformPei: account for TSEG size with PcdSmmSmramRequire set
PlatformPei calls GetSystemMemorySizeBelow4gb() in three locations:
- PublishPeiMemory(): on normal boot, the permanent PEI RAM is installed
so that it ends with the RAM below 4GB,
- QemuInitializeRam(): on normal boot, memory resource descriptor HOBs are
created for the RAM below 4GB; plus MTRR attributes are set
(independently of S3 vs. normal boot)
- MemMapInitialization(): an MMIO resource descriptor HOB is created for
PCI resource allocation, on normal boot, starting at max(RAM below 4GB,
2GB).
The first two of these is adjusted for the configured TSEG size, if
PcdSmmSmramRequire is set:
- In PublishPeiMemory(), the permanent PEI RAM is kept under TSEG.
- In QemuInitializeRam(), we must keep the DXE out of TSEG.
One idea would be to simply trim the [1MB .. LowerMemorySize] memory
resource descriptor HOB, leaving a hole for TSEG in the memory space
map.
The SMM IPL will however want to massage the caching attributes of the
SMRAM range that it loads the SMM core into, with
gDS->SetMemorySpaceAttributes(), and that won't work on a hole. So,
instead of trimming this range, split the TSEG area off, and report it
as a cacheable reserved memory resource.
Finally, since reserved memory can be allocated too, pre-allocate TSEG
in InitializeRamRegions(), after QemuInitializeRam() returns. (Note that
this step alone does not suffice without the resource descriptor HOB
trickery: if we omit that, then the DXE IPL PEIM fails to load and start
the DXE core.)
- In MemMapInitialization(), the start of the PCI MMIO range is not
affected.
We choose the largest option (8MB) for the default TSEG size. Michael
Kinney pointed out that the SMBASE relocation in PiSmmCpuDxeSmm consumes
SMRAM proportionally to the number of CPUs. From the three options
available, he reported that 8MB was both necessary and sufficient for the
SMBASE relocation to succeed with 255 CPUs:
- http://thread.gmane.org/gmane.comp.bios.edk2.devel/3020/focus=3137
- http://thread.gmane.org/gmane.comp.bios.edk2.devel/3020/focus=3177
Cc: Michael Kinney <michael.d.kinney@intel.com>
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Michael Kinney <michael.d.kinney@intel.com>
git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@19039 6f19259b-4bc3-4df7-8a09-765794883524
2015-11-30 19:41:33 +01:00
|
|
|
if (FeaturePcdGet (PcdSmmSmramRequire)) {
|
|
|
|
//
|
|
|
|
// TSEG is chipped from the end of low RAM
|
|
|
|
//
|
|
|
|
LowerMemorySize -= FixedPcdGet8 (PcdQ35TsegMbytes) * SIZE_1MB;
|
|
|
|
}
|
2014-03-04 09:02:16 +01:00
|
|
|
|
2015-06-26 18:09:39 +02:00
|
|
|
PeiMemoryCap = GetPeiMemoryCap ();
|
|
|
|
DEBUG ((EFI_D_INFO, "%a: mPhysMemAddressWidth=%d PeiMemoryCap=%u KB\n",
|
|
|
|
__FUNCTION__, mPhysMemAddressWidth, PeiMemoryCap >> 10));
|
|
|
|
|
2014-03-04 09:02:16 +01:00
|
|
|
//
|
|
|
|
// Determine the range of memory to use during PEI
|
|
|
|
//
|
OvmfPkg: decompress FVs on S3 resume if SMM_REQUIRE is set
If OVMF was built with -D SMM_REQUIRE, that implies that the runtime OS is
not trusted and we should defend against it tampering with the firmware's
data.
One such datum is the PEI firmware volume (PEIFV). Normally PEIFV is
decompressed on the first boot by SEC, then the OS preserves it across S3
suspend-resume cycles; at S3 resume SEC just reuses the originally
decompressed PEIFV.
However, if we don't trust the OS, then SEC must decompress PEIFV from the
pristine flash every time, lest we execute OS-injected code or work with
OS-injected data.
Due to how FVMAIN_COMPACT is organized, we can't decompress just PEIFV;
the decompression brings DXEFV with itself, plus it uses a temporary
output buffer and a scratch buffer too, which even reach above the end of
the finally installed DXEFV. For this reason we must keep away a
non-malicious OS from DXEFV too, plus the memory up to
PcdOvmfDecomprScratchEnd.
The delay introduced by the LZMA decompression on S3 resume is negligible.
If -D SMM_REQUIRE is not specified, then PcdSmmSmramRequire remains FALSE
(from the DEC file), and then this patch has no effect (not counting some
changed debug messages).
If QEMU doesn't support S3 (or the user disabled it on the QEMU command
line), then this patch has no effect also.
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@19037 6f19259b-4bc3-4df7-8a09-765794883524
2015-11-30 19:41:24 +01:00
|
|
|
// Technically we could lay the permanent PEI RAM over SEC's temporary
|
|
|
|
// decompression and scratch buffer even if "secure S3" is needed, since
|
|
|
|
// their lifetimes don't overlap. However, PeiFvInitialization() will cover
|
|
|
|
// RAM up to PcdOvmfDecompressionScratchEnd with an EfiACPIMemoryNVS memory
|
|
|
|
// allocation HOB, and other allocations served from the permanent PEI RAM
|
|
|
|
// shouldn't overlap with that HOB.
|
|
|
|
//
|
|
|
|
MemoryBase = mS3Supported && FeaturePcdGet (PcdSmmSmramRequire) ?
|
|
|
|
PcdGet32 (PcdOvmfDecompressionScratchEnd) :
|
|
|
|
PcdGet32 (PcdOvmfDxeMemFvBase) + PcdGet32 (PcdOvmfDxeMemFvSize);
|
2014-03-04 09:02:16 +01:00
|
|
|
MemorySize = LowerMemorySize - MemoryBase;
|
2015-06-26 18:09:39 +02:00
|
|
|
if (MemorySize > PeiMemoryCap) {
|
|
|
|
MemoryBase = LowerMemorySize - PeiMemoryCap;
|
|
|
|
MemorySize = PeiMemoryCap;
|
2014-03-04 09:02:16 +01:00
|
|
|
}
|
2013-12-08 02:36:07 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// Publish this memory to the PEI Core
|
|
|
|
//
|
|
|
|
Status = PublishSystemMemory(MemoryBase, MemorySize);
|
|
|
|
ASSERT_EFI_ERROR (Status);
|
|
|
|
|
|
|
|
return Status;
|
|
|
|
}
|
|
|
|
|
2011-01-21 17:51:00 +01:00
|
|
|
|
2009-05-27 23:10:18 +02:00
|
|
|
/**
|
2014-02-01 22:22:48 +01:00
|
|
|
Peform Memory Detection for QEMU / KVM
|
2009-05-27 23:10:18 +02:00
|
|
|
|
|
|
|
**/
|
2014-02-01 22:22:48 +01:00
|
|
|
STATIC
|
|
|
|
VOID
|
|
|
|
QemuInitializeRam (
|
|
|
|
VOID
|
2009-05-27 23:10:18 +02:00
|
|
|
)
|
|
|
|
{
|
2011-01-21 17:51:00 +01:00
|
|
|
UINT64 LowerMemorySize;
|
|
|
|
UINT64 UpperMemorySize;
|
OvmfPkg: PlatformPei: invert MTRR setup in QemuInitializeRam()
At the moment we work with a UC default MTRR type, and set three memory
ranges to WB:
- [0, 640 KB),
- [1 MB, LowerMemorySize),
- [4 GB, 4 GB + UpperMemorySize).
Unfortunately, coverage for the third range can fail with a high
likelihood. If the alignment of the base (ie. 4 GB) and the alignment of
the size (UpperMemorySize) differ, then MtrrLib creates a series of
variable MTRR entries, with power-of-two sized MTRR masks. And, it's
really easy to run out of variable MTRR entries, dependent on the
alignment difference.
This is a problem because a Linux guest will loudly reject any high memory
that is not covered my MTRR.
So, let's follow the inverse pattern (loosely inspired by SeaBIOS):
- flip the MTRR default type to WB,
- set [0, 640 KB) to WB -- fixed MTRRs have precedence over the default
type and variable MTRRs, so we can't avoid this,
- set [640 KB, 1 MB) to UC -- implemented with fixed MTRRs,
- set [LowerMemorySize, 4 GB) to UC -- should succeed with variable MTRRs
more likely than the other scheme (due to less chaotic alignment
differences).
Effects of this patch can be observed by setting DEBUG_CACHE (0x00200000)
in PcdDebugPrintErrorLevel.
Cc: Maoming <maoming.maoming@huawei.com>
Cc: Huangpeng (Peter) <peter.huangpeng@huawei.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Maoming <maoming.maoming@huawei.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@17722 6f19259b-4bc3-4df7-8a09-765794883524
2015-06-26 18:09:52 +02:00
|
|
|
MTRR_SETTINGS MtrrSettings;
|
|
|
|
EFI_STATUS Status;
|
2009-05-27 23:10:18 +02:00
|
|
|
|
2014-02-01 22:22:48 +01:00
|
|
|
DEBUG ((EFI_D_INFO, "%a called\n", __FUNCTION__));
|
2009-05-27 23:10:18 +02:00
|
|
|
|
|
|
|
//
|
|
|
|
// Determine total memory size available
|
|
|
|
//
|
2011-01-21 17:51:00 +01:00
|
|
|
LowerMemorySize = GetSystemMemorySizeBelow4gb ();
|
|
|
|
UpperMemorySize = GetSystemMemorySizeAbove4gb ();
|
2009-05-27 23:10:18 +02:00
|
|
|
|
2014-03-04 09:02:30 +01:00
|
|
|
if (mBootMode != BOOT_ON_S3_RESUME) {
|
|
|
|
//
|
|
|
|
// Create memory HOBs
|
|
|
|
//
|
|
|
|
AddMemoryRangeHob (0, BASE_512KB + BASE_128KB);
|
OvmfPkg: PlatformPei: account for TSEG size with PcdSmmSmramRequire set
PlatformPei calls GetSystemMemorySizeBelow4gb() in three locations:
- PublishPeiMemory(): on normal boot, the permanent PEI RAM is installed
so that it ends with the RAM below 4GB,
- QemuInitializeRam(): on normal boot, memory resource descriptor HOBs are
created for the RAM below 4GB; plus MTRR attributes are set
(independently of S3 vs. normal boot)
- MemMapInitialization(): an MMIO resource descriptor HOB is created for
PCI resource allocation, on normal boot, starting at max(RAM below 4GB,
2GB).
The first two of these is adjusted for the configured TSEG size, if
PcdSmmSmramRequire is set:
- In PublishPeiMemory(), the permanent PEI RAM is kept under TSEG.
- In QemuInitializeRam(), we must keep the DXE out of TSEG.
One idea would be to simply trim the [1MB .. LowerMemorySize] memory
resource descriptor HOB, leaving a hole for TSEG in the memory space
map.
The SMM IPL will however want to massage the caching attributes of the
SMRAM range that it loads the SMM core into, with
gDS->SetMemorySpaceAttributes(), and that won't work on a hole. So,
instead of trimming this range, split the TSEG area off, and report it
as a cacheable reserved memory resource.
Finally, since reserved memory can be allocated too, pre-allocate TSEG
in InitializeRamRegions(), after QemuInitializeRam() returns. (Note that
this step alone does not suffice without the resource descriptor HOB
trickery: if we omit that, then the DXE IPL PEIM fails to load and start
the DXE core.)
- In MemMapInitialization(), the start of the PCI MMIO range is not
affected.
We choose the largest option (8MB) for the default TSEG size. Michael
Kinney pointed out that the SMBASE relocation in PiSmmCpuDxeSmm consumes
SMRAM proportionally to the number of CPUs. From the three options
available, he reported that 8MB was both necessary and sufficient for the
SMBASE relocation to succeed with 255 CPUs:
- http://thread.gmane.org/gmane.comp.bios.edk2.devel/3020/focus=3137
- http://thread.gmane.org/gmane.comp.bios.edk2.devel/3020/focus=3177
Cc: Michael Kinney <michael.d.kinney@intel.com>
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Michael Kinney <michael.d.kinney@intel.com>
git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@19039 6f19259b-4bc3-4df7-8a09-765794883524
2015-11-30 19:41:33 +01:00
|
|
|
|
|
|
|
if (FeaturePcdGet (PcdSmmSmramRequire)) {
|
|
|
|
UINT32 TsegSize;
|
|
|
|
|
|
|
|
TsegSize = FixedPcdGet8 (PcdQ35TsegMbytes) * SIZE_1MB;
|
|
|
|
AddMemoryRangeHob (BASE_1MB, LowerMemorySize - TsegSize);
|
|
|
|
AddReservedMemoryBaseSizeHob (LowerMemorySize - TsegSize, TsegSize,
|
|
|
|
TRUE);
|
|
|
|
} else {
|
|
|
|
AddMemoryRangeHob (BASE_1MB, LowerMemorySize);
|
|
|
|
}
|
|
|
|
|
2015-06-26 18:09:48 +02:00
|
|
|
if (UpperMemorySize != 0) {
|
2016-04-21 08:31:55 +02:00
|
|
|
AddMemoryBaseSizeHob (BASE_4GB, UpperMemorySize);
|
2015-06-26 18:09:48 +02:00
|
|
|
}
|
2014-03-04 09:02:30 +01:00
|
|
|
}
|
2009-05-27 23:10:18 +02:00
|
|
|
|
OvmfPkg: PlatformPei: invert MTRR setup in QemuInitializeRam()
At the moment we work with a UC default MTRR type, and set three memory
ranges to WB:
- [0, 640 KB),
- [1 MB, LowerMemorySize),
- [4 GB, 4 GB + UpperMemorySize).
Unfortunately, coverage for the third range can fail with a high
likelihood. If the alignment of the base (ie. 4 GB) and the alignment of
the size (UpperMemorySize) differ, then MtrrLib creates a series of
variable MTRR entries, with power-of-two sized MTRR masks. And, it's
really easy to run out of variable MTRR entries, dependent on the
alignment difference.
This is a problem because a Linux guest will loudly reject any high memory
that is not covered my MTRR.
So, let's follow the inverse pattern (loosely inspired by SeaBIOS):
- flip the MTRR default type to WB,
- set [0, 640 KB) to WB -- fixed MTRRs have precedence over the default
type and variable MTRRs, so we can't avoid this,
- set [640 KB, 1 MB) to UC -- implemented with fixed MTRRs,
- set [LowerMemorySize, 4 GB) to UC -- should succeed with variable MTRRs
more likely than the other scheme (due to less chaotic alignment
differences).
Effects of this patch can be observed by setting DEBUG_CACHE (0x00200000)
in PcdDebugPrintErrorLevel.
Cc: Maoming <maoming.maoming@huawei.com>
Cc: Huangpeng (Peter) <peter.huangpeng@huawei.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Maoming <maoming.maoming@huawei.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@17722 6f19259b-4bc3-4df7-8a09-765794883524
2015-06-26 18:09:52 +02:00
|
|
|
//
|
|
|
|
// We'd like to keep the following ranges uncached:
|
|
|
|
// - [640 KB, 1 MB)
|
|
|
|
// - [LowerMemorySize, 4 GB)
|
|
|
|
//
|
|
|
|
// Everything else should be WB. Unfortunately, programming the inverse (ie.
|
|
|
|
// keeping the default UC, and configuring the complement set of the above as
|
|
|
|
// WB) is not reliable in general, because the end of the upper RAM can have
|
|
|
|
// practically any alignment, and we may not have enough variable MTRRs to
|
|
|
|
// cover it exactly.
|
|
|
|
//
|
|
|
|
if (IsMtrrSupported ()) {
|
|
|
|
MtrrGetAllMtrrs (&MtrrSettings);
|
|
|
|
|
|
|
|
//
|
|
|
|
// MTRRs disabled, fixed MTRRs disabled, default type is uncached
|
|
|
|
//
|
|
|
|
ASSERT ((MtrrSettings.MtrrDefType & BIT11) == 0);
|
|
|
|
ASSERT ((MtrrSettings.MtrrDefType & BIT10) == 0);
|
|
|
|
ASSERT ((MtrrSettings.MtrrDefType & 0xFF) == 0);
|
|
|
|
|
|
|
|
//
|
|
|
|
// flip default type to writeback
|
|
|
|
//
|
|
|
|
SetMem (&MtrrSettings.Fixed, sizeof MtrrSettings.Fixed, 0x06);
|
|
|
|
ZeroMem (&MtrrSettings.Variables, sizeof MtrrSettings.Variables);
|
|
|
|
MtrrSettings.MtrrDefType |= BIT11 | BIT10 | 6;
|
|
|
|
MtrrSetAllMtrrs (&MtrrSettings);
|
2011-10-28 08:04:01 +02:00
|
|
|
|
OvmfPkg: PlatformPei: invert MTRR setup in QemuInitializeRam()
At the moment we work with a UC default MTRR type, and set three memory
ranges to WB:
- [0, 640 KB),
- [1 MB, LowerMemorySize),
- [4 GB, 4 GB + UpperMemorySize).
Unfortunately, coverage for the third range can fail with a high
likelihood. If the alignment of the base (ie. 4 GB) and the alignment of
the size (UpperMemorySize) differ, then MtrrLib creates a series of
variable MTRR entries, with power-of-two sized MTRR masks. And, it's
really easy to run out of variable MTRR entries, dependent on the
alignment difference.
This is a problem because a Linux guest will loudly reject any high memory
that is not covered my MTRR.
So, let's follow the inverse pattern (loosely inspired by SeaBIOS):
- flip the MTRR default type to WB,
- set [0, 640 KB) to WB -- fixed MTRRs have precedence over the default
type and variable MTRRs, so we can't avoid this,
- set [640 KB, 1 MB) to UC -- implemented with fixed MTRRs,
- set [LowerMemorySize, 4 GB) to UC -- should succeed with variable MTRRs
more likely than the other scheme (due to less chaotic alignment
differences).
Effects of this patch can be observed by setting DEBUG_CACHE (0x00200000)
in PcdDebugPrintErrorLevel.
Cc: Maoming <maoming.maoming@huawei.com>
Cc: Huangpeng (Peter) <peter.huangpeng@huawei.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Maoming <maoming.maoming@huawei.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@17722 6f19259b-4bc3-4df7-8a09-765794883524
2015-06-26 18:09:52 +02:00
|
|
|
//
|
|
|
|
// Set memory range from 640KB to 1MB to uncacheable
|
|
|
|
//
|
|
|
|
Status = MtrrSetMemoryAttribute (BASE_512KB + BASE_128KB,
|
|
|
|
BASE_1MB - (BASE_512KB + BASE_128KB), CacheUncacheable);
|
|
|
|
ASSERT_EFI_ERROR (Status);
|
2011-10-28 08:04:01 +02:00
|
|
|
|
OvmfPkg: PlatformPei: invert MTRR setup in QemuInitializeRam()
At the moment we work with a UC default MTRR type, and set three memory
ranges to WB:
- [0, 640 KB),
- [1 MB, LowerMemorySize),
- [4 GB, 4 GB + UpperMemorySize).
Unfortunately, coverage for the third range can fail with a high
likelihood. If the alignment of the base (ie. 4 GB) and the alignment of
the size (UpperMemorySize) differ, then MtrrLib creates a series of
variable MTRR entries, with power-of-two sized MTRR masks. And, it's
really easy to run out of variable MTRR entries, dependent on the
alignment difference.
This is a problem because a Linux guest will loudly reject any high memory
that is not covered my MTRR.
So, let's follow the inverse pattern (loosely inspired by SeaBIOS):
- flip the MTRR default type to WB,
- set [0, 640 KB) to WB -- fixed MTRRs have precedence over the default
type and variable MTRRs, so we can't avoid this,
- set [640 KB, 1 MB) to UC -- implemented with fixed MTRRs,
- set [LowerMemorySize, 4 GB) to UC -- should succeed with variable MTRRs
more likely than the other scheme (due to less chaotic alignment
differences).
Effects of this patch can be observed by setting DEBUG_CACHE (0x00200000)
in PcdDebugPrintErrorLevel.
Cc: Maoming <maoming.maoming@huawei.com>
Cc: Huangpeng (Peter) <peter.huangpeng@huawei.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Maoming <maoming.maoming@huawei.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@17722 6f19259b-4bc3-4df7-8a09-765794883524
2015-06-26 18:09:52 +02:00
|
|
|
//
|
|
|
|
// Set memory range from the "top of lower RAM" (RAM below 4GB) to 4GB as
|
|
|
|
// uncacheable
|
|
|
|
//
|
|
|
|
Status = MtrrSetMemoryAttribute (LowerMemorySize,
|
|
|
|
SIZE_4GB - LowerMemorySize, CacheUncacheable);
|
|
|
|
ASSERT_EFI_ERROR (Status);
|
2011-01-21 17:51:00 +01:00
|
|
|
}
|
2009-05-27 23:10:18 +02:00
|
|
|
}
|
|
|
|
|
2014-02-01 22:22:48 +01:00
|
|
|
/**
|
|
|
|
Publish system RAM and reserve memory regions
|
|
|
|
|
|
|
|
**/
|
|
|
|
VOID
|
|
|
|
InitializeRamRegions (
|
|
|
|
VOID
|
|
|
|
)
|
|
|
|
{
|
2014-02-01 22:22:54 +01:00
|
|
|
if (!mXen) {
|
|
|
|
QemuInitializeRam ();
|
|
|
|
} else {
|
|
|
|
XenPublishRamRegions ();
|
|
|
|
}
|
2014-03-04 09:02:16 +01:00
|
|
|
|
|
|
|
if (mS3Supported && mBootMode != BOOT_ON_S3_RESUME) {
|
|
|
|
//
|
|
|
|
// This is the memory range that will be used for PEI on S3 resume
|
|
|
|
//
|
|
|
|
BuildMemoryAllocationHob (
|
|
|
|
(EFI_PHYSICAL_ADDRESS)(UINTN) PcdGet32 (PcdS3AcpiReservedMemoryBase),
|
|
|
|
(UINT64)(UINTN) PcdGet32 (PcdS3AcpiReservedMemorySize),
|
|
|
|
EfiACPIMemoryNVS
|
|
|
|
);
|
2014-03-04 09:02:45 +01:00
|
|
|
|
|
|
|
//
|
|
|
|
// Cover the initial RAM area used as stack and temporary PEI heap.
|
|
|
|
//
|
|
|
|
// This is reserved as ACPI NVS so it can be used on S3 resume.
|
|
|
|
//
|
|
|
|
BuildMemoryAllocationHob (
|
|
|
|
PcdGet32 (PcdOvmfSecPeiTempRamBase),
|
|
|
|
PcdGet32 (PcdOvmfSecPeiTempRamSize),
|
|
|
|
EfiACPIMemoryNVS
|
|
|
|
);
|
2014-03-04 09:02:52 +01:00
|
|
|
|
OvmfPkg: PlatformPei: protect SEC's GUIDed section handler table thru S3
OVMF's SecMain is unique in the sense that it links against the following
two libraries *in combination*:
- IntelFrameworkModulePkg/Library/LzmaCustomDecompressLib/
LzmaCustomDecompressLib.inf
- MdePkg/Library/BaseExtractGuidedSectionLib/
BaseExtractGuidedSectionLib.inf
The ExtractGuidedSectionLib library class allows decompressor modules to
register themselves (keyed by GUID) with it, and it allows clients to
decompress file sections with a registered decompressor module that
matches the section's GUID.
BaseExtractGuidedSectionLib is a library instance (of type BASE) for this
library class. It has no constructor function.
LzmaCustomDecompressLib is a compatible decompressor module (of type
BASE). Its section type GUID is
gLzmaCustomDecompressGuid == EE4E5898-3914-4259-9D6E-DC7BD79403CF
When OVMF's SecMain module starts, the LzmaCustomDecompressLib constructor
function is executed, which registers its LZMA decompressor with the above
GUID, by calling into BaseExtractGuidedSectionLib:
LzmaDecompressLibConstructor() [GuidedSectionExtraction.c]
ExtractGuidedSectionRegisterHandlers() [BaseExtractGuidedSectionLib.c]
GetExtractGuidedSectionHandlerInfo()
PcdGet64 (PcdGuidedExtractHandlerTableAddress) -- NOTE THIS
Later, during a normal (non-S3) boot, SecMain utilizes this decompressor
to get information about, and to decompress, sections of the OVMF firmware
image:
SecCoreStartupWithStack() [OvmfPkg/Sec/SecMain.c]
SecStartupPhase2()
FindAndReportEntryPoints()
FindPeiCoreImageBase()
DecompressMemFvs()
ExtractGuidedSectionGetInfo() [BaseExtractGuidedSectionLib.c]
ExtractGuidedSectionDecode() [BaseExtractGuidedSectionLib.c]
Notably, only the extraction depends on full-config-boot; the registration
of LzmaCustomDecompressLib occurs unconditionally in the SecMain EFI
binary, triggered by the library constructor function.
This is where the bug happens. BaseExtractGuidedSectionLib maintains the
table of GUIDed decompressors (section handlers) at a fixed memory
location; selected by PcdGuidedExtractHandlerTableAddress (declared in
MdePkg.dec). The default value of this PCD is 0x1000000 (16 MB).
This causes SecMain to corrupt guest OS memory during S3, leading to
random crashes. Compare the following two memory dumps, the first taken
right before suspending, the second taken right after resuming a RHEL-7
guest:
crash> rd -8 -p 1000000 0x50
1000000: c0 00 08 00 02 00 00 00 00 00 00 00 00 00 00 00 ................
1000010: d0 33 0c 00 00 c9 ff ff c0 10 00 01 00 88 ff ff .3..............
1000020: 0a 6d 57 32 0f 00 00 00 38 00 00 01 00 88 ff ff .mW2....8.......
1000030: 00 00 00 00 00 00 00 00 73 69 67 6e 61 6c 6d 6f ........signalmo
1000040: 64 75 6c 65 2e 73 6f 00 00 00 00 00 00 00 00 00 dule.so.........
vs.
crash> rd -8 -p 1000000 0x50
1000000: 45 47 53 49 01 00 00 00 20 00 00 01 00 00 00 00 EGSI.... .......
1000010: 20 01 00 01 00 00 00 00 a0 01 00 01 00 00 00 00 ...............
1000020: 98 58 4e ee 14 39 59 42 9d 6e dc 7b d7 94 03 cf .XN..9YB.n.{....
1000030: 00 00 00 00 00 00 00 00 73 69 67 6e 61 6c 6d 6f ........signalmo
1000040: 64 75 6c 65 2e 73 6f 00 00 00 00 00 00 00 00 00 dule.so.........
The "EGSI" signature corresponds to EXTRACT_HANDLER_INFO_SIGNATURE
declared in
MdePkg/Library/BaseExtractGuidedSectionLib/BaseExtractGuidedSectionLib.c.
Additionally, the gLzmaCustomDecompressGuid (quoted above) is visible at
guest-phys offset 0x1000020.
Fix the problem as follows:
- Carve out 4KB from the 36KB gap that we currently have between
PcdOvmfLockBoxStorageBase + PcdOvmfLockBoxStorageSize == 8220 KB
and
PcdOvmfSecPeiTempRamBase == 8256 KB.
- Point PcdGuidedExtractHandlerTableAddress to 8220 KB (0x00807000).
- Cover the area with an EfiACPIMemoryNVS type memalloc HOB, if S3 is
supported and we're not currently resuming.
The 4KB size that we pick is an upper estimate for
BaseExtractGuidedSectionLib's internal storage size. The latter is
calculated as follows (see GetExtractGuidedSectionHandlerInfo()):
sizeof(EXTRACT_GUIDED_SECTION_HANDLER_INFO) + // 32
PcdMaximumGuidedExtractHandler * (
sizeof(GUID) + // 16
sizeof(EXTRACT_GUIDED_SECTION_DECODE_HANDLER) + // 8
sizeof(EXTRACT_GUIDED_SECTION_GET_INFO_HANDLER) // 8
)
OVMF sets PcdMaximumGuidedExtractHandler to 16 decimal (which is the
MdePkg default too), yielding 32 + 16 * (16 + 8 + 8) == 544 bytes.
Regarding the lifecycle of the new area:
(a) when and how it is initialized after first boot of the VM
The library linked into SecMain finds that the area lacks the signature.
It initializes the signature, plus the rest of the structure. This is
independent of S3 support.
Consumption of the area is also limited to SEC (but consumption does
depend on full-config-boot).
(b) how it is protected from memory allocations during DXE
It is not, in the general case; and we don't need to. Nothing else links
against BaseExtractGuidedSectionLib; it's OK if DXE overwrites the area.
(c) how it is protected from the OS
When S3 is enabled, we cover it with AcpiNVS in InitializeRamRegions().
When S3 is not supported, the range is not protected.
(d) how it is accessed on the S3 resume path
Examined by the library linked into SecMain. Registrations update the
table in-place (based on GUID matches).
(e) how it is accessed on the warm reset path
If S3 is enabled, then the OS won't damage the table (due to (c)), hence
see (d).
If S3 is unsupported, then the OS may or may not overwrite the
signature. (It likely will.) This is identical to the pre-patch status.
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@15433 6f19259b-4bc3-4df7-8a09-765794883524
2014-04-05 23:26:09 +02:00
|
|
|
//
|
|
|
|
// SEC stores its table of GUIDed section handlers here.
|
|
|
|
//
|
|
|
|
BuildMemoryAllocationHob (
|
|
|
|
PcdGet64 (PcdGuidedExtractHandlerTableAddress),
|
|
|
|
PcdGet32 (PcdGuidedExtractHandlerTableSize),
|
|
|
|
EfiACPIMemoryNVS
|
|
|
|
);
|
|
|
|
|
2014-03-04 09:02:52 +01:00
|
|
|
#ifdef MDE_CPU_X64
|
|
|
|
//
|
|
|
|
// Reserve the initial page tables built by the reset vector code.
|
|
|
|
//
|
|
|
|
// Since this memory range will be used by the Reset Vector on S3
|
|
|
|
// resume, it must be reserved as ACPI NVS.
|
|
|
|
//
|
|
|
|
BuildMemoryAllocationHob (
|
|
|
|
(EFI_PHYSICAL_ADDRESS)(UINTN) PcdGet32 (PcdOvmfSecPageTablesBase),
|
|
|
|
(UINT64)(UINTN) PcdGet32 (PcdOvmfSecPageTablesSize),
|
|
|
|
EfiACPIMemoryNVS
|
|
|
|
);
|
|
|
|
#endif
|
OvmfPkg: PlatformPei: lifecycle fixes for the LockBox area
If (mBootMode == BOOT_ON_S3_RESUME) -- that is, we are resuming --, then
the patch has no observable effect.
If (mBootMode != BOOT_ON_S3_RESUME && mS3Supported) -- that is, we are
booting or rebooting, and S3 is supported), then the patch has no
observable effect either.
If (mBootMode != BOOT_ON_S3_RESUME && !mS3Supported) -- that is, we are
booting or rebooting, and S3 is unsupported), then the patch effects the
following two fixes:
- The LockBox storage is reserved from DXE (but not the OS). Drivers in
DXE may save data in the LockBox regardless of S3 support, potentially
corrupting any overlapping allocations. Make sure there's no overlap.
- The LockBox storage is cleared. A LockBox inherited across a non-resume
reboot, populated with well-known GUIDs, breaks drivers that want to
save entries with those GUIDs.
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Matt Fleming <matt.fleming@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@15418 6f19259b-4bc3-4df7-8a09-765794883524
2014-03-31 22:35:50 +02:00
|
|
|
}
|
2014-03-04 09:03:23 +01:00
|
|
|
|
OvmfPkg: PlatformPei: lifecycle fixes for the LockBox area
If (mBootMode == BOOT_ON_S3_RESUME) -- that is, we are resuming --, then
the patch has no observable effect.
If (mBootMode != BOOT_ON_S3_RESUME && mS3Supported) -- that is, we are
booting or rebooting, and S3 is supported), then the patch has no
observable effect either.
If (mBootMode != BOOT_ON_S3_RESUME && !mS3Supported) -- that is, we are
booting or rebooting, and S3 is unsupported), then the patch effects the
following two fixes:
- The LockBox storage is reserved from DXE (but not the OS). Drivers in
DXE may save data in the LockBox regardless of S3 support, potentially
corrupting any overlapping allocations. Make sure there's no overlap.
- The LockBox storage is cleared. A LockBox inherited across a non-resume
reboot, populated with well-known GUIDs, breaks drivers that want to
save entries with those GUIDs.
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Matt Fleming <matt.fleming@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@15418 6f19259b-4bc3-4df7-8a09-765794883524
2014-03-31 22:35:50 +02:00
|
|
|
if (mBootMode != BOOT_ON_S3_RESUME) {
|
2015-11-30 19:42:10 +01:00
|
|
|
if (!FeaturePcdGet (PcdSmmSmramRequire)) {
|
|
|
|
//
|
|
|
|
// Reserve the lock box storage area
|
|
|
|
//
|
|
|
|
// Since this memory range will be used on S3 resume, it must be
|
|
|
|
// reserved as ACPI NVS.
|
|
|
|
//
|
|
|
|
// If S3 is unsupported, then various drivers might still write to the
|
|
|
|
// LockBox area. We ought to prevent DXE from serving allocation requests
|
|
|
|
// such that they would overlap the LockBox storage.
|
|
|
|
//
|
|
|
|
ZeroMem (
|
|
|
|
(VOID*)(UINTN) PcdGet32 (PcdOvmfLockBoxStorageBase),
|
|
|
|
(UINTN) PcdGet32 (PcdOvmfLockBoxStorageSize)
|
|
|
|
);
|
|
|
|
BuildMemoryAllocationHob (
|
|
|
|
(EFI_PHYSICAL_ADDRESS)(UINTN) PcdGet32 (PcdOvmfLockBoxStorageBase),
|
|
|
|
(UINT64)(UINTN) PcdGet32 (PcdOvmfLockBoxStorageSize),
|
|
|
|
mS3Supported ? EfiACPIMemoryNVS : EfiBootServicesData
|
|
|
|
);
|
|
|
|
}
|
OvmfPkg: PlatformPei: account for TSEG size with PcdSmmSmramRequire set
PlatformPei calls GetSystemMemorySizeBelow4gb() in three locations:
- PublishPeiMemory(): on normal boot, the permanent PEI RAM is installed
so that it ends with the RAM below 4GB,
- QemuInitializeRam(): on normal boot, memory resource descriptor HOBs are
created for the RAM below 4GB; plus MTRR attributes are set
(independently of S3 vs. normal boot)
- MemMapInitialization(): an MMIO resource descriptor HOB is created for
PCI resource allocation, on normal boot, starting at max(RAM below 4GB,
2GB).
The first two of these is adjusted for the configured TSEG size, if
PcdSmmSmramRequire is set:
- In PublishPeiMemory(), the permanent PEI RAM is kept under TSEG.
- In QemuInitializeRam(), we must keep the DXE out of TSEG.
One idea would be to simply trim the [1MB .. LowerMemorySize] memory
resource descriptor HOB, leaving a hole for TSEG in the memory space
map.
The SMM IPL will however want to massage the caching attributes of the
SMRAM range that it loads the SMM core into, with
gDS->SetMemorySpaceAttributes(), and that won't work on a hole. So,
instead of trimming this range, split the TSEG area off, and report it
as a cacheable reserved memory resource.
Finally, since reserved memory can be allocated too, pre-allocate TSEG
in InitializeRamRegions(), after QemuInitializeRam() returns. (Note that
this step alone does not suffice without the resource descriptor HOB
trickery: if we omit that, then the DXE IPL PEIM fails to load and start
the DXE core.)
- In MemMapInitialization(), the start of the PCI MMIO range is not
affected.
We choose the largest option (8MB) for the default TSEG size. Michael
Kinney pointed out that the SMBASE relocation in PiSmmCpuDxeSmm consumes
SMRAM proportionally to the number of CPUs. From the three options
available, he reported that 8MB was both necessary and sufficient for the
SMBASE relocation to succeed with 255 CPUs:
- http://thread.gmane.org/gmane.comp.bios.edk2.devel/3020/focus=3137
- http://thread.gmane.org/gmane.comp.bios.edk2.devel/3020/focus=3177
Cc: Michael Kinney <michael.d.kinney@intel.com>
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Michael Kinney <michael.d.kinney@intel.com>
git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@19039 6f19259b-4bc3-4df7-8a09-765794883524
2015-11-30 19:41:33 +01:00
|
|
|
|
|
|
|
if (FeaturePcdGet (PcdSmmSmramRequire)) {
|
|
|
|
UINT32 TsegSize;
|
|
|
|
|
|
|
|
//
|
|
|
|
// Make sure the TSEG area that we reported as a reserved memory resource
|
|
|
|
// cannot be used for reserved memory allocations.
|
|
|
|
//
|
|
|
|
TsegSize = FixedPcdGet8 (PcdQ35TsegMbytes) * SIZE_1MB;
|
|
|
|
BuildMemoryAllocationHob (
|
|
|
|
GetSystemMemorySizeBelow4gb() - TsegSize,
|
|
|
|
TsegSize,
|
|
|
|
EfiReservedMemoryType
|
|
|
|
);
|
|
|
|
}
|
2014-03-04 09:02:16 +01:00
|
|
|
}
|
2014-02-01 22:22:48 +01:00
|
|
|
}
|