64 bit bar sizing and MMIO allocation. The 64 bit window is placed above high memory, top down from the end of guest physical address space.
Signed-off-by: Xudong Hao xudong.hao@intel.com --- src/mtrr.c | 24 +++++++++----- src/pci.h | 2 +- src/pciinit.c | 99 ++++++++++++++++++++++++++++++++++++++------------------ src/util.h | 1 + 4 files changed, 85 insertions(+), 41 deletions(-)
diff --git a/src/mtrr.c b/src/mtrr.c index 0548043..05264c4 100644 --- a/src/mtrr.c +++ b/src/mtrr.c @@ -31,12 +31,26 @@ #define MTRR_MEMTYPE_WP 5 #define MTRR_MEMTYPE_WB 6
+int cpu_phys_addr(void) +{ + u32 eax, ebx, ecx, edx; + int phys_bits = 36; + cpuid(0x80000000u, &eax, &ebx, &ecx, &edx); + + if (eax >= 0x80000008) { + /* Get physical bits from leaf 0x80000008 (if available) */ + cpuid(0x80000008u, &eax, &ebx, &ecx, &edx); + phys_bits = eax & 0xff; + } + return phys_bits; +} + void mtrr_setup(void) { if (!CONFIG_MTRR_INIT || CONFIG_COREBOOT || usingXen()) return;
- u32 eax, ebx, ecx, edx, cpuid_features; + u32 eax, ebx, ecx, cpuid_features; cpuid(1, &eax, &ebx, &ecx, &cpuid_features); if (!(cpuid_features & CPUID_MTRR)) return; @@ -81,13 +95,7 @@ void mtrr_setup(void) }
// Set variable MTRRs - int phys_bits = 36; - cpuid(0x80000000u, &eax, &ebx, &ecx, &edx); - if (eax >= 0x80000008) { - /* Get physical bits from leaf 0x80000008 (if available) */ - cpuid(0x80000008u, &eax, &ebx, &ecx, &edx); - phys_bits = eax & 0xff; - } + int phys_bits = cpu_phys_addr(); u64 phys_mask = ((1ull << phys_bits) - 1); for (i=0; i<vcnt; i++) { wrmsr_smp(MTRRphysBase_MSR(i), 0); diff --git a/src/pci.h b/src/pci.h index a2a5a4c..0fa2ae8 100644 --- a/src/pci.h +++ b/src/pci.h @@ -53,7 +53,7 @@ struct pci_device { u8 secondary_bus; struct { u32 addr; - u32 size; + u64 size; int is64; } bars[PCI_NUM_REGIONS];
diff --git a/src/pciinit.c b/src/pciinit.c index 7d83368..a3a92ae 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -22,6 +22,7 @@ enum pci_region_type { PCI_REGION_TYPE_IO, PCI_REGION_TYPE_MEM, PCI_REGION_TYPE_PREFMEM, + PCI_REGION_TYPE_MEM_64, PCI_REGION_TYPE_COUNT, };
@@ -29,35 +30,40 @@ static const char *region_type_name[] = { [ PCI_REGION_TYPE_IO ] = "io", [ PCI_REGION_TYPE_MEM ] = "mem", [ PCI_REGION_TYPE_PREFMEM ] = "prefmem", + [ PCI_REGION_TYPE_MEM_64 ] = "mem64", };
struct pci_bus { struct { /* pci region stats */ - u32 count[32 - PCI_MEM_INDEX_SHIFT]; - u32 sum, max; + u32 count[64 - PCI_MEM_INDEX_SHIFT]; + u64 sum, max; /* seconday bus region sizes */ u32 size; /* pci region assignments */ - u32 bases[32 - PCI_MEM_INDEX_SHIFT]; - u32 base; + u64 bases[64 - PCI_MEM_INDEX_SHIFT]; + u64 base; } r[PCI_REGION_TYPE_COUNT]; struct pci_device *bus_dev; };
-static int pci_size_to_index(u32 size, enum pci_region_type type) +static u64 pci_size_to_index(u64 size, enum pci_region_type type) { - int index = __fls(size); + u64 index; int shift = (type == PCI_REGION_TYPE_IO) ? PCI_IO_INDEX_SHIFT : PCI_MEM_INDEX_SHIFT;
+ if (((u32)(size >> 32) & ~0) == 0) + index = (u64)__fls((u32)size); + else + index = (u64)__fls((u32)(size >> 32)) + 32; if (index < shift) index = shift; index -= shift; return index; }
-static u32 pci_index_to_size(int index, enum pci_region_type type) +static u64 pci_index_to_size(u64 index, enum pci_region_type type) { int shift = (type == PCI_REGION_TYPE_IO) ? PCI_IO_INDEX_SHIFT : PCI_MEM_INDEX_SHIFT; @@ -69,6 +75,8 @@ static enum pci_region_type pci_addr_to_type(u32 addr) { if (addr & PCI_BASE_ADDRESS_SPACE_IO) return PCI_REGION_TYPE_IO; + if (addr & PCI_BASE_ADDRESS_MEM_TYPE_64) + return PCI_REGION_TYPE_MEM_64; if (addr & PCI_BASE_ADDRESS_MEM_PREFETCH) return PCI_REGION_TYPE_PREFMEM; return PCI_REGION_TYPE_MEM; @@ -330,7 +338,7 @@ static u32 pci_size_roundup(u32 size) }
static void -pci_bios_get_bar(struct pci_device *pci, int bar, u32 *val, u32 *size) +pci_bios_get_bar(struct pci_device *pci, int bar, u32 *val, u64 *size) { u32 ofs = pci_bar(pci, bar); u16 bdf = pci->bdf; @@ -352,9 +360,9 @@ pci_bios_get_bar(struct pci_device *pci, int bar, u32 *val, u32 *size) *size = (~(*val & mask)) + 1; }
-static void pci_bios_bus_reserve(struct pci_bus *bus, int type, u32 size) +static void pci_bios_bus_reserve(struct pci_bus *bus, int type, u64 size) { - u32 index; + u64 index;
index = pci_size_to_index(size, type); size = pci_index_to_size(index, type); @@ -378,17 +386,27 @@ static void pci_bios_check_devices(struct pci_bus *busses) struct pci_bus *bus = &busses[pci_bdf_to_bus(pci->bdf)]; int i; for (i = 0; i < PCI_NUM_REGIONS; i++) { - u32 val, size; + u32 val, val_upper; + u64 size; + u64 size_upper; + u8 is64; + pci_bios_get_bar(pci, i, &val, &size); - if (val == 0) + is64 = (!(val & PCI_BASE_ADDRESS_SPACE_IO) && + (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) + == PCI_BASE_ADDRESS_MEM_TYPE_64); + + if (is64) { + pci_bios_get_bar(pci, i + 1, &val_upper, &size_upper); + size = (size_upper << 32) | size; + } + size &= ~(size - 1); + if (size == 0) continue; - pci_bios_bus_reserve(bus, pci_addr_to_type(val), size); pci->bars[i].addr = val; pci->bars[i].size = size; - pci->bars[i].is64 = (!(val & PCI_BASE_ADDRESS_SPACE_IO) && - (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) - == PCI_BASE_ADDRESS_MEM_TYPE_64); + pci->bars[i].is64 = is64;
if (pci->bars[i].is64) i++; @@ -410,7 +428,7 @@ static void pci_bios_check_devices(struct pci_bus *busses) if (s->r[type].size < limit) s->r[type].size = limit; s->r[type].size = pci_size_roundup(s->r[type].size); - pci_bios_bus_reserve(parent, type, s->r[type].size); + pci_bios_bus_reserve(parent, type, (u64)s->r[type].size); } dprintf(1, "PCI: secondary bus %d sizes: io %x, mem %x, prefmem %x\n", secondary_bus, @@ -423,11 +441,12 @@ static void pci_bios_check_devices(struct pci_bus *busses) #define ROOT_BASE(top, sum, max) ALIGN_DOWN((top)-(sum),(max) ?: 1)
// Setup region bases (given the regions' size and alignment) -static int pci_bios_init_root_regions(struct pci_bus *bus, u32 start, u32 end) +static int pci_bios_init_root_regions(struct pci_bus *bus, u32 start, u32 end, u64 high_start, u64 high_end) { bus->r[PCI_REGION_TYPE_IO].base = 0xc000;
int reg1 = PCI_REGION_TYPE_PREFMEM, reg2 = PCI_REGION_TYPE_MEM; + int reg3 = PCI_REGION_TYPE_MEM_64; if (bus->r[reg1].sum < bus->r[reg2].sum) { // Swap regions so larger area is more likely to align well. reg1 = PCI_REGION_TYPE_MEM; @@ -436,7 +455,8 @@ static int pci_bios_init_root_regions(struct pci_bus *bus, u32 start, u32 end) bus->r[reg2].base = ROOT_BASE(end, bus->r[reg2].sum, bus->r[reg2].max); bus->r[reg1].base = ROOT_BASE(bus->r[reg2].base, bus->r[reg1].sum , bus->r[reg1].max); - if (bus->r[reg1].base < start) + bus->r[reg3].base = ROOT_BASE(high_end, bus->r[reg3].sum, bus->r[reg3].max); + if (bus->r[reg1].base < start || bus->r[reg3].base < high_start) // Memory range requested is larger than available. return -1; return 0; @@ -449,29 +469,34 @@ static int pci_bios_init_root_regions(struct pci_bus *bus, u32 start, u32 end)
static void pci_bios_init_bus_bases(struct pci_bus *bus) { - u32 base, newbase, size; + u64 base, newbase, size; int type, i;
for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { - dprintf(1, " type %s max %x sum %x base %x\n", region_type_name[type], - bus->r[type].max, bus->r[type].sum, bus->r[type].base); + dprintf(1, " type %s max 0x%08x%08x sum 0x%08x%08x base 0x%08x%08x\n", + region_type_name[type], (u32)(bus->r[type].max >> 32), + (u32)(bus->r[type].max), (u32)(bus->r[type].sum >> 32), + (u32)(bus->r[type].sum), (u32)(bus->r[type].base >> 32), + (u32)(bus->r[type].base)); base = bus->r[type].base; for (i = ARRAY_SIZE(bus->r[type].count)-1; i >= 0; i--) { size = pci_index_to_size(i, type); if (!bus->r[type].count[i]) continue; newbase = base + size * bus->r[type].count[i]; - dprintf(1, " size %8x: %d bar(s), %8x -> %8x\n", - size, bus->r[type].count[i], base, newbase - 1); + dprintf(1, " size 0x%08x%08x: %d bar(s), 0x%08x%08x -> 0x%08x%08x\n", + (u32)(size >> 32), (u32)size, bus->r[type].count[i], + (u32)(base >> 32), (u32)base, (u32)((newbase - 1) >> 32), + (u32)(newbase -1)); bus->r[type].bases[i] = base; base = newbase; } } }
-static u32 pci_bios_bus_get_addr(struct pci_bus *bus, int type, u32 size) +static u64 pci_bios_bus_get_addr(struct pci_bus *bus, int type, u64 size) { - u32 index, addr; + u64 index, addr;
index = pci_size_to_index(size, type); addr = bus->r[type].bases[index]; @@ -500,7 +525,7 @@ static void pci_bios_map_devices(struct pci_bus *busses) int type; for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { s->r[type].base = pci_bios_bus_get_addr( - parent, type, s->r[type].size); + parent, type, (u64)s->r[type].size); } dprintf(1, "PCI: init bases bus %d (secondary)\n", secondary_bus); pci_bios_init_bus_bases(s); @@ -539,14 +564,21 @@ static void pci_bios_map_devices(struct pci_bus *busses) if (pci->bars[i].addr == 0) continue;
+ u64 addr_64; int type = pci_addr_to_type(pci->bars[i].addr); - u32 addr = pci_bios_bus_get_addr(bus, type, pci->bars[i].size); - dprintf(1, " bar %d, addr %x, size %x [%s]\n", - i, addr, pci->bars[i].size, region_type_name[type]); + addr_64 = pci_bios_bus_get_addr(bus, type, pci->bars[i].size); + + u32 addr = (u32)addr_64; + u32 addr_upper = (u32)(addr_64 >> 32); + dprintf(1, " bar %d, addr %x, size 0x%08x%08x [%s]\n", + i, addr, (u32)(pci->bars[i].size >> 32), + (u32)(pci->bars[i].size), region_type_name[type]); pci_set_io_region_addr(pci, i, addr);
- if (pci->bars[i].is64) + if (pci->bars[i].is64) { + pci_set_io_region_addr(pci, i + 1, addr_upper); i++; + } } } } @@ -569,6 +601,8 @@ pci_setup(void)
u32 start = BUILD_PCIMEM_START; u32 end = BUILD_PCIMEM_END; + u64 high_start = RamSizeOver4G + 0x100000000ull; + u64 high_end = 1ull << cpu_phys_addr();
dprintf(1, "=== PCI bus & bridge init ===\n"); if (pci_probe_host() != 0) { @@ -587,7 +621,8 @@ pci_setup(void) } memset(busses, 0, sizeof(*busses) * (MaxPCIBus + 1)); pci_bios_check_devices(busses); - if (pci_bios_init_root_regions(&busses[0], start, end) != 0) { + if (pci_bios_init_root_regions(&busses[0], start, end, high_start, + high_end) != 0) { panic("PCI: out of address space\n"); }
diff --git a/src/util.h b/src/util.h index eecba8b..4eacb36 100644 --- a/src/util.h +++ b/src/util.h @@ -475,6 +475,7 @@ static inline void free(void *data) { }
// mtrr.c +int cpu_phys_addr(void); void mtrr_setup(void);
// romlayout.S
On 11/02/12 06:42, Xudong Hao wrote:
64 bit bar sizing and MMIO allocation. The 64 bit window is placed above high memory, top down from the end of guest physical address space.
What problem you are trying to fix? The existing code should handle 64bit bars just fine. By default they are placed below 4G though for compatibility reasons (make old 32bit guests happy). When running out of address space seabios will try map them above 4G though to make room below 4G.
Mapping your 64bit PCI bars above 4G unconditionally (for testing or other reasons) can simply be done this way:
--- a/src/pciinit.c +++ b/src/pciinit.c @@ -599,7 +599,7 @@ static void pci_bios_map_devices(struct pci_bus *busses) { pcimem_start = RamSize;
- if (pci_bios_init_root_regions(busses)) { + if (1 /* pci_bios_init_root_regions(busses) */) { struct pci_region r64_mem, r64_pref; r64_mem.list = NULL; r64_pref.list = NULL;
We might want add a config option for this.
cheers, Gerd
-----Original Message----- From: Gerd Hoffmann [mailto:kraxel@redhat.com] Sent: Friday, November 02, 2012 5:00 PM To: Hao, Xudong Cc: kevin@koconnor.net; seabios@seabios.org; kvm@vger.kernel.org; avi@redhat.com Subject: Re: [PATCH] seabios/pci: enable 64 bit bar on seabios
On 11/02/12 06:42, Xudong Hao wrote:
64 bit bar sizing and MMIO allocation. The 64 bit window is placed above high memory, top down from the end of guest physical address space.
What problem you are trying to fix? The existing code should handle 64bit bars just fine. By default they are placed below 4G though for compatibility reasons (make old 32bit guests happy). When running out of address space seabios will try map them above 4G though to make room below 4G.
I just want to enable 64 bit bars for KVM usage, seabios 1.7.0 is used in current qemu-kvm, which not handle 64 bit bars yet. I cloned seabios code from kernel.org(seems no 64 bit bars supporting), but I was not taking notice of the tree on http://git.qemu.org/, yes it has already done 64 bit bars handling. So you may ignore this patch.
Btw, when will the latest seabios(especially 64 bits bars) be involved qemu-kvm?
Mapping your 64bit PCI bars above 4G unconditionally (for testing or other reasons) can simply be done this way:
--- a/src/pciinit.c +++ b/src/pciinit.c @@ -599,7 +599,7 @@ static void pci_bios_map_devices(struct pci_bus *busses) { pcimem_start = RamSize;
- if (pci_bios_init_root_regions(busses)) {
- if (1 /* pci_bios_init_root_regions(busses) */) { struct pci_region r64_mem, r64_pref; r64_mem.list = NULL; r64_pref.list = NULL;
We might want add a config option for this.
cheers, Gerd
On Fri, Nov 02, 2012 at 01:42:08PM +0800, Xudong Hao wrote:
64 bit bar sizing and MMIO allocation. The 64 bit window is placed above high memory, top down from the end of guest physical address space.
Your patch seems to be against an old version of SeaBIOS. The latest SeaBIOS already supports 64bit pci bars.
-Kevin