FWD
----- Forwarded message from Alexey Korolev alexey.korolev@endace.com -----
Subject: [PATCH] [Seabios] Over 4GB address ranges for 64bit PCI BARs X-Mailer: Evolution 2.24.3
Hi,
We have seen some issues with 64bit PCI address and large BAR region support in seabios.
On attempting to register a 64bit BAR of 1GB size we had some strange failures in qemu. After some debugging we find out that source of the issue was in seabios PCI enumeration code.
The issue takes place because of u32 overflow in pciinit.c. …......... if (ALIGN(*paddr, size) + size >= BUILD_PCIPREFMEM_END) { dprintf(1, "prefmem region of (bdf 0x%x bar %d) can't be mapped. " "decrease BUILD_PCIMEM_SIZE and recompile. size %x\n", bdf, region_num, BUILD_PCIPREFMEM_SIZE); size = 0; } …........... if (size > 0) { *paddr = ALIGN(*paddr, size); pci_set_io_region_addr(bdf, region_num, *paddr); *paddr += size; }
If size is greater than 0xFFFFFFFF ??? BUILD_PCIPREFMEM_START (256MB), this call ALIGN(*paddr, size) will always return 0. The protection test fails, size remains > 0, and guest memory mapping will be corrupted.
We have found that a very similar problem was discussed previously:
http://www.mail-archive.com/qemu-devel@nongnu.org/msg38090.html
The discussion also touches on the question of 64bit addressing for PCI BARs. In the current implementation regardless of whether the PCI BAR type is 64bit or not the addressable range will be limited to the first 4GB. We also want to have "true" 64bit addressable range for PCI BARs.
So to solve these issues some changes were made. Tracking for overflows has been added as well as support for 64bit range. To support the full 64bit address range a pci_bios_64bit_addr variable has been added. The 64bit range can be used only if BAR has PCI_BASE_ADDRESS_MEM_TYPE_64 attribute and the given region doesn't fit in first 4GB. The patch has been tested on Linux and BSD guest OS's and it appears to work fine.
Signed-off-by Alexey Korolev Alexey.Korolev@endace.com Signed-off-by Stephen Donelly Stephen.Donelly@endace.com --- pciinit.c | 83 ++++++++++++++++++++++++++++++++------------------------------ 1 file changed, 43 insertions(+), 40 deletions(-)
diff --git a/src/pciinit.c b/src/pciinit.c index 0346423..86c8137 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -20,6 +20,7 @@ static void pci_bios_init_device_in_bus(int bus); static u32 pci_bios_io_addr; static u32 pci_bios_mem_addr; static u32 pci_bios_prefmem_addr; +static u64 pci_bios_64bit_addr; /* host irqs corresponding to PCI irqs A-D */ const u8 pci_irqs[4] = { 10, 10, 11, 11 @@ -56,9 +57,11 @@ static int pci_bios_allocate_region(u16 bdf, int region_num) { u32 *paddr; u32 ofs = pci_bar(bdf, region_num); - u32 old = pci_config_readl(bdf, ofs); u32 mask; + u32 limit_addr; + int is_64bit; + if (region_num == PCI_ROM_SLOT) { mask = PCI_ROM_ADDRESS_MASK; pci_config_writel(bdf, ofs, mask); @@ -73,50 +76,49 @@ static int pci_bios_allocate_region(u16 bdf, int region_num) pci_config_writel(bdf, ofs, old);
u32 size = (~(val & mask)) + 1; - if (val != 0) { - if (val & PCI_BASE_ADDRESS_SPACE_IO) { - paddr = &pci_bios_io_addr; - if (ALIGN(*paddr, size) + size >= 64 * 1024) { - dprintf(1, - "io region of (bdf 0x%x bar %d) can't be mapped.\n", - bdf, region_num); - size = 0; - } - } else if ((val & PCI_BASE_ADDRESS_MEM_PREFETCH) && - /* keep behaviour on bus = 0 */ - pci_bdf_to_bus(bdf) != 0 && - /* If pci_bios_prefmem_addr == 0, keep old behaviour */ - pci_bios_prefmem_addr != 0) { - paddr = &pci_bios_prefmem_addr; - if (ALIGN(*paddr, size) + size >= BUILD_PCIPREFMEM_END) { - dprintf(1, - "prefmem region of (bdf 0x%x bar %d) can't be mapped. " - "decrease BUILD_PCIMEM_SIZE and recompile. size %x\n", - bdf, region_num, BUILD_PCIPREFMEM_SIZE); - size = 0; - } - } else { - paddr = &pci_bios_mem_addr; - if (ALIGN(*paddr, size) + size >= BUILD_PCIMEM_END) { + if (!val || !size) + return 0; + + is_64bit = !(val & PCI_BASE_ADDRESS_SPACE_IO) && + (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64; + + if (val & PCI_BASE_ADDRESS_SPACE_IO) { + paddr = &pci_bios_io_addr; + limit_addr = 64 * 1024; + } else if ((val & PCI_BASE_ADDRESS_MEM_PREFETCH) && + /* keep behaviour on bus = 0 */ + pci_bdf_to_bus(bdf) != 0 && + /* If pci_bios_prefmem_addr == 0, keep old behaviour */ + pci_bios_prefmem_addr != 0) { + paddr = &pci_bios_prefmem_addr; + limit_addr = BUILD_PCIPREFMEM_END; + } else { + paddr = &pci_bios_mem_addr; + limit_addr = BUILD_PCIMEM_END; + } + + /* The region is out of allowed 32bit mapping range */ + if ((ALIGN(*paddr, size) + size >= limit_addr) || + (ALIGN(*paddr, size) + size <= *paddr)) { + if (!is_64bit) { dprintf(1, "mem region of (bdf 0x%x bar %d) can't be mapped. " - "increase BUILD_PCIMEM_SIZE and recompile. size %x\n", - bdf, region_num, BUILD_PCIMEM_SIZE); - size = 0; + "Region size %x\n", bdf, region_num, size); + return 0; } - } - if (size > 0) { - *paddr = ALIGN(*paddr, size); - pci_set_io_region_addr(bdf, region_num, *paddr); - *paddr += size; - } + pci_bios_64bit_addr = ALIGN(pci_bios_64bit_addr, size); + pci_set_io_region_addr(bdf, region_num, (u32)pci_bios_64bit_addr); + pci_set_io_region_addr(bdf, region_num + 1, + (u32)(pci_bios_64bit_addr >> 32)); + pci_bios_64bit_addr += size; + return is_64bit; }
- int is_64bit = !(val & PCI_BASE_ADDRESS_SPACE_IO) && - (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64; - if (is_64bit && size > 0) { - pci_config_writel(bdf, ofs + 4, 0); - } + *paddr = ALIGN(*paddr, size); + pci_set_io_region_addr(bdf, region_num, *paddr); + if (is_64bit) + pci_set_io_region_addr(bdf, region_num + 1, 0); + *paddr += size; return is_64bit; }
@@ -409,6 +411,7 @@ pci_setup(void) pci_bios_io_addr = 0xc000; pci_bios_mem_addr = BUILD_PCIMEM_START; pci_bios_prefmem_addr = BUILD_PCIPREFMEM_START; + pci_bios_64bit_addr = ALIGN(RamSizeOver4G + 0x100000000ull, 0x100000000ull);
pci_bios_init_bus();
----- End forwarded message -----