Hi,
The effort to add support for 64bit pci bars to qemu seems to have stalled somehow. Lets resume it ;)
Here is a patch collection, largely rewritten from scratch, which improves seabios 64bit support. One big patch (#3) adds 64bit math everythere. The other big patch (#5) adds support for ressource allocation above 4G. The remaining patches carry bugfixes and little bits such as support for printing 64bit hex numbers.
cheers, Gerd
Gerd Hoffmann (5): 64bit hex printf support pci: split device discovery into multiple steps pci: 64bit support. pci: bridges can have two regions too pci: add prefmem64 region type.
src/acpi-dsdt.dsl | 7 ++ src/config.h | 2 + src/output.c | 32 ++++++-- src/pci.h | 14 ++- src/pciinit.c | 245 +++++++++++++++++++++++++++++++++++++---------------- 5 files changed, 217 insertions(+), 83 deletions(-)
Add support for printing 64bit hex numbers to src/output.c
Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- src/output.c | 32 ++++++++++++++++++++++++++------ 1 files changed, 26 insertions(+), 6 deletions(-)
diff --git a/src/output.c b/src/output.c index bdde7cc..0873776 100644 --- a/src/output.c +++ b/src/output.c @@ -196,15 +196,15 @@ putsinglehex(struct putcinfo *action, u32 val)
// Output an integer in hexadecimal. static void -puthex(struct putcinfo *action, u32 val, int width, int spacepad) +puthex(struct putcinfo *action, u64 val, int width, int spacepad) { if (!width) { - u32 tmp = val; + u64 tmp = val; width = 1; while (tmp >>= 4) width++; } else if (spacepad) { - u32 tmp = val; + u64 tmp = val; u32 count = 1; while (tmp >>= 4) count++; @@ -217,7 +217,15 @@ puthex(struct putcinfo *action, u32 val, int width, int spacepad) }
switch (width) { - default: putsinglehex(action, (val >> 28) & 0xf); + default: putsinglehex(action, (val >> 60) & 0xf); + case 15: putsinglehex(action, (val >> 56) & 0xf); + case 14: putsinglehex(action, (val >> 52) & 0xf); + case 13: putsinglehex(action, (val >> 48) & 0xf); + case 12: putsinglehex(action, (val >> 44) & 0xf); + case 11: putsinglehex(action, (val >> 40) & 0xf); + case 10: putsinglehex(action, (val >> 36) & 0xf); + case 9: putsinglehex(action, (val >> 32) & 0xf); + case 8: putsinglehex(action, (val >> 28) & 0xf); case 7: putsinglehex(action, (val >> 24) & 0xf); case 6: putsinglehex(action, (val >> 20) & 0xf); case 5: putsinglehex(action, (val >> 16) & 0xf); @@ -249,6 +257,7 @@ bvprintf(struct putcinfo *action, const char *fmt, va_list args) const char *n = s+1; int field_width = 0; int spacepad = 1; + int is64 = 0; for (;;) { c = GET_GLOBAL(*(u8*)n); if (!isdigit(c)) @@ -264,6 +273,12 @@ bvprintf(struct putcinfo *action, const char *fmt, va_list args) n++; c = GET_GLOBAL(*(u8*)n); } + if (c == 'l') { + // long long format indicator + is64 = 1; + n++; + c = GET_GLOBAL(*(u8*)n); + } s32 val; const char *sarg; switch (c) { @@ -289,8 +304,13 @@ bvprintf(struct putcinfo *action, const char *fmt, va_list args) field_width = 8; spacepad = 0; case 'x': - val = va_arg(args, s32); - puthex(action, val, field_width, spacepad); + if (is64) { + u64 val64 = va_arg(args, u64); + puthex(action, val64, field_width, spacepad); + } else { + u32 val32 = va_arg(args, u32); + puthex(action, val32, field_width, spacepad); + } break; case 'c': val = va_arg(args, int);
First bridge init, next pci bar discovery, finally pci bar ressource allocation. Needed because we need to figure whenever we can map 64bit bars above 4G before doing ressource allocation.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- src/pciinit.c | 39 ++++++++++++++++++++++++++++++++------- 1 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/src/pciinit.c b/src/pciinit.c index 9f3fdd4..652564c 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -368,22 +368,28 @@ static void pci_bios_check_devices(struct pci_bus *busses) { dprintf(1, "PCI: check devices\n");
- // Calculate resources needed for regular (non-bus) devices. struct pci_device *pci; + struct pci_bus *bus; + int i; + + // init pci bridges foreachpci(pci) { - if (pci->class == PCI_CLASS_BRIDGE_PCI) { - busses[pci->secondary_bus].bus_dev = pci; + if (pci->class != PCI_CLASS_BRIDGE_PCI) + continue; + bus = &busses[pci->secondary_bus]; + bus->bus_dev = pci; + } + + // discover pci bars + foreachpci(pci) { + if (pci->class == PCI_CLASS_BRIDGE_PCI) continue; - } - struct pci_bus *bus = &busses[pci_bdf_to_bus(pci->bdf)]; - int i; for (i = 0; i < PCI_NUM_REGIONS; i++) { u32 val, size; pci_bios_get_bar(pci, i, &val, &size); if (val == 0) continue;
- pci_bios_bus_reserve(bus, pci_addr_to_type(val), size); pci->bars[i].addr = val; pci->bars[i].size = size; pci->bars[i].is64 = (!(val & PCI_BASE_ADDRESS_SPACE_IO) && @@ -395,6 +401,25 @@ static void pci_bios_check_devices(struct pci_bus *busses) } }
+ // alloc ressources for pci bars + foreachpci(pci) { + if (pci->class == PCI_CLASS_BRIDGE_PCI) + continue; + bus = &busses[pci_bdf_to_bus(pci->bdf)]; + for (i = 0; i < PCI_NUM_REGIONS; i++) { + enum pci_region_type type; + if (pci->bars[i].addr == 0) + continue; + + type = pci_addr_to_type(pci->bars[i].addr); + pci_bios_bus_reserve(bus, type, + pci->bars[i].size); + + if (pci->bars[i].is64) + i++; + } + } + // Propagate required bus resources to parent busses. int secondary_bus; for (secondary_bus=MaxPCIBus; secondary_bus>0; secondary_bus--) {
Makes pciinit.c 64bit aware. Use 64bit everywhere. Support discovery and configuration of 64bit bars, with non-zero upper32 bits. While being at it introduce a struct pci_bar which can be passed easily.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- src/pci.h | 14 +++++--- src/pciinit.c | 105 ++++++++++++++++++++++++++++++++++++--------------------- 2 files changed, 75 insertions(+), 44 deletions(-)
diff --git a/src/pci.h b/src/pci.h index a2a5a4c..46f162e 100644 --- a/src/pci.h +++ b/src/pci.h @@ -39,6 +39,14 @@ void pci_config_maskw(u16 bdf, u32 addr, u16 off, u16 on); struct pci_device *pci_find_device(u16 vendid, u16 devid); struct pci_device *pci_find_class(u16 classid);
+struct pci_bar { + u64 addr; + u64 size; + int is64:1, + ismem:1, + isprefetch:1; +}; + struct pci_device { u16 bdf; u8 rootbus; @@ -51,11 +59,7 @@ struct pci_device { u8 prog_if, revision; u8 header_type; u8 secondary_bus; - struct { - u32 addr; - u32 size; - int is64; - } bars[PCI_NUM_REGIONS]; + struct pci_bar bars[PCI_NUM_REGIONS];
// Local information on device. int have_driver; diff --git a/src/pciinit.c b/src/pciinit.c index 652564c..7de72f4 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -35,12 +35,12 @@ struct pci_bus { struct { /* pci region stats */ u32 count[32 - PCI_MEM_INDEX_SHIFT]; - u32 sum, max; + u64 sum, max; /* seconday bus region sizes */ - u32 size; + u64 size; /* pci region assignments */ - u32 bases[32 - PCI_MEM_INDEX_SHIFT]; - u32 base; + u64 bases[32 - PCI_MEM_INDEX_SHIFT]; + u64 base; } r[PCI_REGION_TYPE_COUNT]; struct pci_device *bus_dev; }; @@ -86,9 +86,15 @@ static u32 pci_bar(struct pci_device *pci, int region_num) }
static void -pci_set_io_region_addr(struct pci_device *pci, int region_num, u32 addr) +pci_set_io_region_addr(struct pci_device *pci, int region_num, + u64 addr, int is64) { - pci_config_writel(pci->bdf, pci_bar(pci, region_num), addr); + u32 ofs = pci_bar(pci, region_num); + + pci_config_writel(pci->bdf, ofs, addr & 0xffffffff); + if (is64) { + pci_config_writel(pci->bdf, ofs + 4, addr >> 32); + } }
@@ -141,10 +147,10 @@ static const struct pci_device_id pci_isa_bridge_tbl[] = { static void storage_ide_init(struct pci_device *pci, void *arg) { /* IDE: we map it as in ISA mode */ - pci_set_io_region_addr(pci, 0, PORT_ATA1_CMD_BASE); - pci_set_io_region_addr(pci, 1, PORT_ATA1_CTRL_BASE); - pci_set_io_region_addr(pci, 2, PORT_ATA2_CMD_BASE); - pci_set_io_region_addr(pci, 3, PORT_ATA2_CTRL_BASE); + pci_set_io_region_addr(pci, 0, PORT_ATA1_CMD_BASE, 0); + pci_set_io_region_addr(pci, 1, PORT_ATA1_CTRL_BASE, 0); + pci_set_io_region_addr(pci, 2, PORT_ATA2_CMD_BASE, 0); + pci_set_io_region_addr(pci, 3, PORT_ATA2_CTRL_BASE, 0); }
/* PIIX3/PIIX4 IDE */ @@ -158,13 +164,13 @@ static void piix_ide_init(struct pci_device *pci, void *arg) static void pic_ibm_init(struct pci_device *pci, void *arg) { /* PIC, IBM, MPIC & MPIC2 */ - pci_set_io_region_addr(pci, 0, 0x80800000 + 0x00040000); + pci_set_io_region_addr(pci, 0, 0x80800000 + 0x00040000, 0); }
static void apple_macio_init(struct pci_device *pci, void *arg) { /* macio bridge */ - pci_set_io_region_addr(pci, 0, 0x80800000); + pci_set_io_region_addr(pci, 0, 0x80800000, 0); }
static const struct pci_device_id pci_class_tbl[] = { @@ -330,14 +336,14 @@ static u32 pci_size_roundup(u32 size) }
static void -pci_bios_get_bar(struct pci_device *pci, int bar, u32 *val, u32 *size) +pci_bios_get_bar(struct pci_device *pci, int nr, struct pci_bar *bar) { - u32 ofs = pci_bar(pci, bar); + u32 ofs = pci_bar(pci, nr); u16 bdf = pci->bdf; u32 old = pci_config_readl(bdf, ofs); - u32 mask; + u64 mask;
- if (bar == PCI_ROM_SLOT) { + if (nr == PCI_ROM_SLOT) { mask = PCI_ROM_ADDRESS_MASK; pci_config_writel(bdf, ofs, mask); } else { @@ -347,12 +353,38 @@ pci_bios_get_bar(struct pci_device *pci, int bar, u32 *val, u32 *size) mask = PCI_BASE_ADDRESS_MEM_MASK; pci_config_writel(bdf, ofs, ~0); } - *val = pci_config_readl(bdf, ofs); + bar->addr = pci_config_readl(bdf, ofs); pci_config_writel(bdf, ofs, old); - *size = (~(*val & mask)) + 1; + if ((bar->addr & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_MEMORY) { + bar->ismem = 1; + if ((bar->addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == + PCI_BASE_ADDRESS_MEM_TYPE_64) + bar->is64 = 1; + if (bar->addr & PCI_BASE_ADDRESS_MEM_PREFETCH) + bar->isprefetch = 1; + } + if (bar->is64) { + u32 hold, high; + hold = pci_config_readl(bdf, ofs + 4); + pci_config_writel(bdf, ofs + 4, ~0); + high = pci_config_readl(bdf, ofs + 4); + pci_config_writel(bdf, ofs + 4, hold); + bar->addr |= ((u64)high << 32); + mask |= ((u64)0xffffffff << 32); + bar->size = (~(bar->addr & mask)) + 1; + } else if (bar->addr != 0) { + bar->size = (~(bar->addr & mask) & 0xffffffff) + 1; + } + if (bar->addr != 0) { + dprintf(1, " %d: addr %llx size %llx %s %s%s\n", nr, + bar->addr, bar->size, + bar->ismem ? "mem" : "io", + bar->is64 ? "64bit" : "32bit", + bar->isprefetch ? " prefetchable" : ""); + } }
-static void pci_bios_bus_reserve(struct pci_bus *bus, int type, u32 size) +static void pci_bios_bus_reserve(struct pci_bus *bus, int type, u64 size) { u32 index;
@@ -384,18 +416,14 @@ static void pci_bios_check_devices(struct pci_bus *busses) foreachpci(pci) { if (pci->class == PCI_CLASS_BRIDGE_PCI) continue; + dprintf(1, "PCI: check device bdf=%02x:%02x.%x\n", + pci_bdf_to_bus(pci->bdf), pci_bdf_to_dev(pci->bdf), + pci_bdf_to_fn(pci->bdf)); for (i = 0; i < PCI_NUM_REGIONS; i++) { - u32 val, size; - pci_bios_get_bar(pci, i, &val, &size); - if (val == 0) + pci_bios_get_bar(pci, i, &pci->bars[i]); + if (pci->bars[i].addr == 0) continue;
- pci->bars[i].addr = val; - pci->bars[i].size = size; - pci->bars[i].is64 = (!(val & PCI_BASE_ADDRESS_SPACE_IO) && - (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) - == PCI_BASE_ADDRESS_MEM_TYPE_64); - if (pci->bars[i].is64) i++; } @@ -429,7 +457,7 @@ static void pci_bios_check_devices(struct pci_bus *busses) struct pci_bus *parent = &busses[pci_bdf_to_bus(s->bus_dev->bdf)]; int type; for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { - u32 limit = (type == PCI_REGION_TYPE_IO) ? + u64 limit = (type == PCI_REGION_TYPE_IO) ? PCI_BRIDGE_IO_MIN : PCI_BRIDGE_MEM_MIN; s->r[type].size = s->r[type].sum; if (s->r[type].size < limit) @@ -437,7 +465,7 @@ static void pci_bios_check_devices(struct pci_bus *busses) s->r[type].size = pci_size_roundup(s->r[type].size); pci_bios_bus_reserve(parent, type, s->r[type].size); } - dprintf(1, "PCI: secondary bus %d sizes: io %x, mem %x, prefmem %x\n", + dprintf(1, "PCI: secondary bus %d sizes: io %llx, mem %llx, prefmem %llx\n", secondary_bus, s->r[PCI_REGION_TYPE_IO].size, s->r[PCI_REGION_TYPE_MEM].size, @@ -474,11 +502,11 @@ static int pci_bios_init_root_regions(struct pci_bus *bus, u32 start, u32 end)
static void pci_bios_init_bus_bases(struct pci_bus *bus) { - u32 base, newbase, size; + u64 base, newbase, size; int type, i;
for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { - dprintf(1, " type %s max %x sum %x base %x\n", region_type_name[type], + dprintf(1, " type %s max %llx sum %llx base %llx\n", region_type_name[type], bus->r[type].max, bus->r[type].sum, bus->r[type].base); base = bus->r[type].base; for (i = ARRAY_SIZE(bus->r[type].count)-1; i >= 0; i--) { @@ -486,7 +514,7 @@ static void pci_bios_init_bus_bases(struct pci_bus *bus) if (!bus->r[type].count[i]) continue; newbase = base + size * bus->r[type].count[i]; - dprintf(1, " size %8x: %d bar(s), %8x -> %8x\n", + dprintf(1, " size %8llx: %d bar(s), %8llx -> %8llx\n", size, bus->r[type].count[i], base, newbase - 1); bus->r[type].bases[i] = base; base = newbase; @@ -530,8 +558,8 @@ static void pci_bios_map_devices(struct pci_bus *busses) dprintf(1, "PCI: init bases bus %d (secondary)\n", secondary_bus); pci_bios_init_bus_bases(s);
- u32 base = s->r[PCI_REGION_TYPE_IO].base; - u32 limit = base + s->r[PCI_REGION_TYPE_IO].size - 1; + u64 base = s->r[PCI_REGION_TYPE_IO].base; + u64 limit = base + s->r[PCI_REGION_TYPE_IO].size - 1; pci_config_writeb(bdf, PCI_IO_BASE, base >> PCI_IO_SHIFT); pci_config_writew(bdf, PCI_IO_BASE_UPPER16, 0); pci_config_writeb(bdf, PCI_IO_LIMIT, limit >> PCI_IO_SHIFT); @@ -565,14 +593,13 @@ static void pci_bios_map_devices(struct pci_bus *busses) continue;
int type = pci_addr_to_type(pci->bars[i].addr); - u32 addr = pci_bios_bus_get_addr(bus, type, pci->bars[i].size); - dprintf(1, " bar %d, addr %x, size %x [%s]\n", + u64 addr = pci_bios_bus_get_addr(bus, type, pci->bars[i].size); + dprintf(1, " bar %d, addr %llx, size %llx [%s]\n", i, addr, pci->bars[i].size, region_type_name[type]); - pci_set_io_region_addr(pci, i, addr); + pci_set_io_region_addr(pci, i, addr, pci->bars[i].is64);
if (pci->bars[i].is64) { i++; - pci_set_io_region_addr(pci, i, 0); } } }
Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- src/pciinit.c | 20 ++++++++------------ 1 files changed, 8 insertions(+), 12 deletions(-)
diff --git a/src/pciinit.c b/src/pciinit.c index 7de72f4..a98f6e3 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -402,7 +402,7 @@ static void pci_bios_check_devices(struct pci_bus *busses)
struct pci_device *pci; struct pci_bus *bus; - int i; + int i, num_regions;
// init pci bridges foreachpci(pci) { @@ -414,12 +414,11 @@ static void pci_bios_check_devices(struct pci_bus *busses)
// discover pci bars foreachpci(pci) { - if (pci->class == PCI_CLASS_BRIDGE_PCI) - continue; + num_regions = (pci->class == PCI_CLASS_BRIDGE_PCI) ? 2 : PCI_NUM_REGIONS; dprintf(1, "PCI: check device bdf=%02x:%02x.%x\n", pci_bdf_to_bus(pci->bdf), pci_bdf_to_dev(pci->bdf), pci_bdf_to_fn(pci->bdf)); - for (i = 0; i < PCI_NUM_REGIONS; i++) { + for (i = 0; i < num_regions; i++) { pci_bios_get_bar(pci, i, &pci->bars[i]); if (pci->bars[i].addr == 0) continue; @@ -431,10 +430,9 @@ static void pci_bios_check_devices(struct pci_bus *busses)
// alloc ressources for pci bars foreachpci(pci) { - if (pci->class == PCI_CLASS_BRIDGE_PCI) - continue; + num_regions = (pci->class == PCI_CLASS_BRIDGE_PCI) ? 2 : PCI_NUM_REGIONS; bus = &busses[pci_bdf_to_bus(pci->bdf)]; - for (i = 0; i < PCI_NUM_REGIONS; i++) { + for (i = 0; i < num_regions; i++) { enum pci_region_type type; if (pci->bars[i].addr == 0) continue; @@ -543,7 +541,7 @@ static void pci_bios_map_devices(struct pci_bus *busses) pci_bios_init_bus_bases(&busses[0]);
// Map regions on each secondary bus. - int secondary_bus; + int secondary_bus, num_regions, i; for (secondary_bus=1; secondary_bus<=MaxPCIBus; secondary_bus++) { struct pci_bus *s = &busses[secondary_bus]; if (!s->bus_dev) @@ -581,14 +579,12 @@ static void pci_bios_map_devices(struct pci_bus *busses) // Map regions on each device. struct pci_device *pci; foreachpci(pci) { - if (pci->class == PCI_CLASS_BRIDGE_PCI) - continue; + num_regions = (pci->class == PCI_CLASS_BRIDGE_PCI) ? 2 : PCI_NUM_REGIONS; u16 bdf = pci->bdf; dprintf(1, "PCI: map device bdf=%02x:%02x.%x\n" , pci_bdf_to_bus(bdf), pci_bdf_to_dev(bdf), pci_bdf_to_fn(bdf)); struct pci_bus *bus = &busses[pci_bdf_to_bus(bdf)]; - int i; - for (i = 0; i < PCI_NUM_REGIONS; i++) { + for (i = 0; i < num_regions; i++) { if (pci->bars[i].addr == 0) continue;
This patch adds a prefmem64 region type. 64bit prefmem pci bars are assigned to that region if the device is either on the root bus or if they are behind a 64bit capable bridge and all other prefmem bars behind that bridge are 64bit capable too.
The patch also changes the bridge ressource allocation: Unused memory windows are disabled: If none of the devices connected to the bridge has -- say -- I/O bars, then the I/O memory window is turned off. This implies that bridges without devices don't get any ressources assigned. The pci bridge spec wants us behave that way (figured while looking at drivers/pci/setup-bus.c in the linux kernel source tree). It also avoids assigning ressources for both prefmem and prefmem64 ;)
With this patch applied seabios is able to map 64bit bars above 4G.
TODO: detect 64bit capable bridges.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- src/acpi-dsdt.dsl | 7 +++ src/config.h | 2 + src/pciinit.c | 107 +++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 89 insertions(+), 27 deletions(-)
diff --git a/src/acpi-dsdt.dsl b/src/acpi-dsdt.dsl index 7082b65..c17e947 100644 --- a/src/acpi-dsdt.dsl +++ b/src/acpi-dsdt.dsl @@ -175,6 +175,13 @@ DefinitionBlock ( 0x00000000, // Address Translation Offset 0x1EC00000, // Address Length ,, , AddressRangeMemory, TypeStatic) + QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, + 0x00000000, // Address Space Granularity + 0x8000000000, // Address Range Minimum + 0xFFFFFFFFFF, // Address Range Maximum + 0x00000000, // Address Translation Offset + 0x8000000000, // Address Length + ,, , AddressRangeMemory, TypeStatic) }) } } diff --git a/src/config.h b/src/config.h index b0187a4..5850476 100644 --- a/src/config.h +++ b/src/config.h @@ -47,6 +47,8 @@
#define BUILD_PCIMEM_START 0xe0000000 #define BUILD_PCIMEM_END 0xfec00000 /* IOAPIC is mapped at */ +#define BUILD_PCIMEM64_START 0x8000000000ULL +#define BUILD_PCIMEM64_END 0xFFFFFFFFFFULL
#define BUILD_IOAPIC_ADDR 0xfec00000 #define BUILD_HPET_ADDRESS 0xfed00000 diff --git a/src/pciinit.c b/src/pciinit.c index a98f6e3..927e59a 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -22,13 +22,15 @@ enum pci_region_type { PCI_REGION_TYPE_IO, PCI_REGION_TYPE_MEM, PCI_REGION_TYPE_PREFMEM, + PCI_REGION_TYPE_PREFMEM64, PCI_REGION_TYPE_COUNT, };
static const char *region_type_name[] = { - [ PCI_REGION_TYPE_IO ] = "io", - [ PCI_REGION_TYPE_MEM ] = "mem", - [ PCI_REGION_TYPE_PREFMEM ] = "prefmem", + [ PCI_REGION_TYPE_IO ] = "io", + [ PCI_REGION_TYPE_MEM ] = "mem", + [ PCI_REGION_TYPE_PREFMEM ] = "prefmem", + [ PCI_REGION_TYPE_PREFMEM64 ] = "prefmem64", };
struct pci_bus { @@ -42,6 +44,7 @@ struct pci_bus { u64 bases[32 - PCI_MEM_INDEX_SHIFT]; u64 base; } r[PCI_REGION_TYPE_COUNT]; + int use_prefmem64; struct pci_device *bus_dev; };
@@ -65,13 +68,17 @@ static u32 pci_index_to_size(int index, enum pci_region_type type) return 0x1 << (index + shift); }
-static enum pci_region_type pci_addr_to_type(u32 addr) +static enum pci_region_type pci_addr_to_type(struct pci_bus *bus, struct pci_bar *bar) { - if (addr & PCI_BASE_ADDRESS_SPACE_IO) + if (!bar->ismem) return PCI_REGION_TYPE_IO; - if (addr & PCI_BASE_ADDRESS_MEM_PREFETCH) + if (!bar->isprefetch) + return PCI_REGION_TYPE_MEM; + if (!bar->is64) return PCI_REGION_TYPE_PREFMEM; - return PCI_REGION_TYPE_MEM; + if (bus->bus_dev != NULL && !bus->use_prefmem64) + return PCI_REGION_TYPE_PREFMEM; + return PCI_REGION_TYPE_PREFMEM64; }
static u32 pci_bar(struct pci_device *pci, int region_num) @@ -410,11 +417,16 @@ static void pci_bios_check_devices(struct pci_bus *busses) continue; bus = &busses[pci->secondary_bus]; bus->bus_dev = pci; + /* + * TODO: figure whenever the bridge can handle 64bit prefmem + * and set bus->use_prefmem64 if so. + */ }
// discover pci bars foreachpci(pci) { num_regions = (pci->class == PCI_CLASS_BRIDGE_PCI) ? 2 : PCI_NUM_REGIONS; + bus = &busses[pci_bdf_to_bus(pci->bdf)]; dprintf(1, "PCI: check device bdf=%02x:%02x.%x\n", pci_bdf_to_bus(pci->bdf), pci_bdf_to_dev(pci->bdf), pci_bdf_to_fn(pci->bdf)); @@ -423,6 +435,10 @@ static void pci_bios_check_devices(struct pci_bus *busses) if (pci->bars[i].addr == 0) continue;
+ if (pci->bars[i].ismem && pci->bars[i].isprefetch && !pci->bars[i].is64) { + bus->use_prefmem64 = 0; + } + if (pci->bars[i].is64) i++; } @@ -437,7 +453,7 @@ static void pci_bios_check_devices(struct pci_bus *busses) if (pci->bars[i].addr == 0) continue;
- type = pci_addr_to_type(pci->bars[i].addr); + type = pci_addr_to_type(bus, &pci->bars[i]); pci_bios_bus_reserve(bus, type, pci->bars[i].size);
@@ -457,28 +473,38 @@ static void pci_bios_check_devices(struct pci_bus *busses) for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { u64 limit = (type == PCI_REGION_TYPE_IO) ? PCI_BRIDGE_IO_MIN : PCI_BRIDGE_MEM_MIN; + if (s->r[type].sum == 0) + continue; s->r[type].size = s->r[type].sum; if (s->r[type].size < limit) s->r[type].size = limit; s->r[type].size = pci_size_roundup(s->r[type].size); pci_bios_bus_reserve(parent, type, s->r[type].size); } - dprintf(1, "PCI: secondary bus %d sizes: io %llx, mem %llx, prefmem %llx\n", + dprintf(1, "PCI: secondary bus %d sizes: io %llx, mem %llx, " + "prefmem %llx, prefmem64 %llx\n", secondary_bus, s->r[PCI_REGION_TYPE_IO].size, s->r[PCI_REGION_TYPE_MEM].size, - s->r[PCI_REGION_TYPE_PREFMEM].size); + s->r[PCI_REGION_TYPE_PREFMEM].size, + s->r[PCI_REGION_TYPE_PREFMEM64].size); } }
#define ROOT_BASE(top, sum, max) ALIGN_DOWN((top)-(sum),(max) ?: 1)
// Setup region bases (given the regions' size and alignment) -static int pci_bios_init_root_regions(struct pci_bus *bus, u32 start, u32 end) +static int pci_bios_init_root_regions(struct pci_bus *bus) { + u32 start = BUILD_PCIMEM_START; + u32 end = BUILD_PCIMEM_END; + u64 start64 = BUILD_PCIMEM64_START; + u64 end64 = BUILD_PCIMEM64_END; + bus->r[PCI_REGION_TYPE_IO].base = 0xc000;
int reg1 = PCI_REGION_TYPE_PREFMEM, reg2 = PCI_REGION_TYPE_MEM; + int reg3 = PCI_REGION_TYPE_PREFMEM64; if (bus->r[reg1].sum < bus->r[reg2].sum) { // Swap regions so larger area is more likely to align well. reg1 = PCI_REGION_TYPE_MEM; @@ -490,6 +516,12 @@ static int pci_bios_init_root_regions(struct pci_bus *bus, u32 start, u32 end) if (bus->r[reg1].base < start) // Memory range requested is larger than available. return -1; + + bus->r[reg3].base = ROOT_BASE(end64, bus->r[reg3].sum, bus->r[reg3].max); + if (bus->r[reg3].base < start64) + // Memory range requested is larger than available. + return -1; + return 0; }
@@ -504,6 +536,8 @@ static void pci_bios_init_bus_bases(struct pci_bus *bus) int type, i;
for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { + if (bus->r[type].sum == 0) + continue; dprintf(1, " type %s max %llx sum %llx base %llx\n", region_type_name[type], bus->r[type].max, bus->r[type].sum, bus->r[type].base); base = bus->r[type].base; @@ -512,7 +546,7 @@ static void pci_bios_init_bus_bases(struct pci_bus *bus) if (!bus->r[type].count[i]) continue; newbase = base + size * bus->r[type].count[i]; - dprintf(1, " size %8llx: %d bar(s), %8llx -> %8llx\n", + dprintf(1, " size %llx: %d bar(s), %llx -> %llx\n", size, bus->r[type].count[i], base, newbase - 1); bus->r[type].bases[i] = base; base = newbase; @@ -520,9 +554,10 @@ static void pci_bios_init_bus_bases(struct pci_bus *bus) } }
-static u32 pci_bios_bus_get_addr(struct pci_bus *bus, int type, u32 size) +static u64 pci_bios_bus_get_addr(struct pci_bus *bus, int type, u64 size) { - u32 index, addr; + u32 index; + u64 addr;
index = pci_size_to_index(size, type); addr = bus->r[type].bases[index]; @@ -550,30 +585,51 @@ static void pci_bios_map_devices(struct pci_bus *busses) struct pci_bus *parent = &busses[pci_bdf_to_bus(bdf)]; int type; for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { + if (s->r[type].sum == 0) + continue; s->r[type].base = pci_bios_bus_get_addr( parent, type, s->r[type].size); } dprintf(1, "PCI: init bases bus %d (secondary)\n", secondary_bus); pci_bios_init_bus_bases(s);
- u64 base = s->r[PCI_REGION_TYPE_IO].base; - u64 limit = base + s->r[PCI_REGION_TYPE_IO].size - 1; + u64 base, limit; + if (s->r[PCI_REGION_TYPE_IO].sum) { + base = s->r[PCI_REGION_TYPE_IO].base; + limit = base + s->r[PCI_REGION_TYPE_IO].size - 1; + } else { + base = PCI_BRIDGE_IO_MIN; + limit = 0; + } pci_config_writeb(bdf, PCI_IO_BASE, base >> PCI_IO_SHIFT); pci_config_writew(bdf, PCI_IO_BASE_UPPER16, 0); pci_config_writeb(bdf, PCI_IO_LIMIT, limit >> PCI_IO_SHIFT); pci_config_writew(bdf, PCI_IO_LIMIT_UPPER16, 0);
- base = s->r[PCI_REGION_TYPE_MEM].base; - limit = base + s->r[PCI_REGION_TYPE_MEM].size - 1; + if (s->r[PCI_REGION_TYPE_MEM].sum) { + base = s->r[PCI_REGION_TYPE_MEM].base; + limit = base + s->r[PCI_REGION_TYPE_MEM].size - 1; + } else { + base = PCI_BRIDGE_MEM_MIN; + limit = 0; + } pci_config_writew(bdf, PCI_MEMORY_BASE, base >> PCI_MEMORY_SHIFT); pci_config_writew(bdf, PCI_MEMORY_LIMIT, limit >> PCI_MEMORY_SHIFT);
- base = s->r[PCI_REGION_TYPE_PREFMEM].base; - limit = base + s->r[PCI_REGION_TYPE_PREFMEM].size - 1; + if (s->r[PCI_REGION_TYPE_PREFMEM].sum) { + base = s->r[PCI_REGION_TYPE_PREFMEM].base; + limit = base + s->r[PCI_REGION_TYPE_PREFMEM].size - 1; + } else if (s->r[PCI_REGION_TYPE_PREFMEM64].sum) { + base = s->r[PCI_REGION_TYPE_PREFMEM64].base; + limit = base + s->r[PCI_REGION_TYPE_PREFMEM64].size - 1; + } else { + base = PCI_BRIDGE_MEM_MIN; + limit = 0; + } pci_config_writew(bdf, PCI_PREF_MEMORY_BASE, base >> PCI_PREF_MEMORY_SHIFT); pci_config_writew(bdf, PCI_PREF_MEMORY_LIMIT, limit >> PCI_PREF_MEMORY_SHIFT); - pci_config_writel(bdf, PCI_PREF_BASE_UPPER32, 0); - pci_config_writel(bdf, PCI_PREF_LIMIT_UPPER32, 0); + pci_config_writel(bdf, PCI_PREF_BASE_UPPER32, base >> 32); + pci_config_writel(bdf, PCI_PREF_LIMIT_UPPER32, base >> 32); }
// Map regions on each device. @@ -588,7 +644,7 @@ static void pci_bios_map_devices(struct pci_bus *busses) if (pci->bars[i].addr == 0) continue;
- int type = pci_addr_to_type(pci->bars[i].addr); + int type = pci_addr_to_type(bus, &pci->bars[i]); u64 addr = pci_bios_bus_get_addr(bus, type, pci->bars[i].size); dprintf(1, " bar %d, addr %llx, size %llx [%s]\n", i, addr, pci->bars[i].size, region_type_name[type]); @@ -617,9 +673,6 @@ pci_setup(void)
dprintf(3, "pci setup\n");
- u32 start = BUILD_PCIMEM_START; - u32 end = BUILD_PCIMEM_END; - dprintf(1, "=== PCI bus & bridge init ===\n"); if (pci_probe_host() != 0) { return; @@ -637,7 +690,7 @@ pci_setup(void) } memset(busses, 0, sizeof(*busses) * (MaxPCIBus + 1)); pci_bios_check_devices(busses); - if (pci_bios_init_root_regions(&busses[0], start, end) != 0) { + if (pci_bios_init_root_regions(&busses[0]) != 0) { panic("PCI: out of address space\n"); }
On 02/16/12 18:21, Gerd Hoffmann wrote:
Hi,
The effort to add support for 64bit pci bars to qemu seems to have stalled somehow. Lets resume it ;)
Here is a patch collection, largely rewritten from scratch, which improves seabios 64bit support. One big patch (#3) adds 64bit math everythere. The other big patch (#5) adds support for ressource allocation above 4G. The remaining patches carry bugfixes and little bits such as support for printing 64bit hex numbers.
cheers, Gerd
ping?
cheers, Gerd
On Tue, Feb 21, 2012 at 09:25:58AM +0100, Gerd Hoffmann wrote:
On 02/16/12 18:21, Gerd Hoffmann wrote:
Hi,
The effort to add support for 64bit pci bars to qemu seems to have stalled somehow. Lets resume it ;)
Here is a patch collection, largely rewritten from scratch, which improves seabios 64bit support. One big patch (#3) adds 64bit math everythere. The other big patch (#5) adds support for ressource allocation above 4G. The remaining patches carry bugfixes and little bits such as support for printing 64bit hex numbers.
cheers, Gerd
ping?
I haven't really had a chance to review it. Initial thoughts were - on patch 1, I'm not sure how that will impact stack usage which is quite tight when running in 16bit mode; patch 5 - seems incomplete if it doesn't handle bridges properly. I didn't fully understand patch 5, but that's likely just due to lack of time to look at it.
BTW, what's the use case for 64bit PCI today?
-Kevin
Hi,
The effort to add support for 64bit pci bars to qemu seems to have stalled somehow. Lets resume it ;)
Here is a patch collection, largely rewritten from scratch, which improves seabios 64bit support. One big patch (#3) adds 64bit math everythere. The other big patch (#5) adds support for ressource allocation above 4G. The remaining patches carry bugfixes and little bits such as support for printing 64bit hex numbers.
cheers, Gerd
ping?
I haven't really had a chance to review it. Initial thoughts were - on patch 1, I'm not sure how that will impact stack usage which is quite tight when running in 16bit mode; patch 5 - seems incomplete if it doesn't handle bridges properly. I didn't fully understand patch 5, but that's likely just due to lack of time to look at it.
BTW, what's the use case for 64bit PCI today?
Hmm at the moment I've almost complete testing of another implementation of 64bit BAR support. It's implemeted in a very different way. Just need a day or so to form patches and write a description.
Who needs this today. At the moment qemu 1.0 does not work with 64bit BARs properly. I managed to investigate the issue with 64bit bars when they are allocated in 32bit range. See the thread here: http://lists.gnu.org/archive/html/qemu-devel/2012-01/msg03189.html Plus there are some other issues in qemu which don't allow using 64bit BARs at the moment. I hope that all these issues are gone in the next release. At least Avi has promised to fix the main problem in qemu.
If 64bit BAR support is implemented we will be able to go ahead with network monitoring card in qemu. So we need this feature. In addition big memory ranges (i.e. 64bit BARs) are very desirable for qemu ivshmem driver (virtual pci device for inter-vm shared memory). And I'm pretty sure there will be other use cases.
Hi,
I haven't really had a chance to review it. Initial thoughts were
- on patch 1, I'm not sure how that will impact stack usage which
is quite tight when running in 16bit mode;
Stack usage doesn't grow much I think. struct pci_dev becomes larger, but I doubt you can find those on the stack. Some local variables move from u32 to u64. But doesn't run this code in 32bit anyway?
patch 5 - seems incomplete if it doesn't handle bridges properly. I didn't fully understand patch 5, but that's likely just due to lack of time to look at it.
Yea, 64bit window detection isn't there, couldn't test that because qemu didn't support it, but mst posted a patch for that yesterday.
BTW, what's the use case for 64bit PCI today?
Hmm at the moment I've almost complete testing of another implementation of 64bit BAR support. It's implemeted in a very different way. Just need a day or so to form patches and write a description.
Oh, ok. I had the impression the effort is stalled due to complete silence for a bunch of weeks. /me looks forward to look at your patches.
Who needs this today. At the moment qemu 1.0 does not work with 64bit BARs properly.
latest master works fine.
If 64bit BAR support is implemented we will be able to go ahead with network monitoring card in qemu. So we need this feature. In addition big memory ranges (i.e. 64bit BARs) are very desirable for qemu ivshmem driver (virtual pci device for inter-vm shared memory). And I'm pretty sure there will be other use cases.
Yea, this is pretty much about moving large memory bars out of the PCI address space window below 4G. ivshmem is a obvious candidate. I have patches for QXL too.
cheers, Gerd
On Wed, Feb 22, 2012 at 11:55:53AM +0100, Gerd Hoffmann wrote:
I haven't really had a chance to review it. Initial thoughts were
- on patch 1, I'm not sure how that will impact stack usage which
is quite tight when running in 16bit mode;
Stack usage doesn't grow much I think. struct pci_dev becomes larger, but I doubt you can find those on the stack. Some local variables move from u32 to u64. But doesn't run this code in 32bit anyway?
I was referring to the changes to dprintf, which does run in 16bit mode. There is a script tools/checkstack.py that can calculate the 16bit stack usages.
-Kevin
On 02/22/12 14:38, Kevin O'Connor wrote:
On Wed, Feb 22, 2012 at 11:55:53AM +0100, Gerd Hoffmann wrote:
I haven't really had a chance to review it. Initial thoughts were
- on patch 1, I'm not sure how that will impact stack usage which
is quite tight when running in 16bit mode;
Stack usage doesn't grow much I think. struct pci_dev becomes larger, but I doubt you can find those on the stack. Some local variables move from u32 to u64. But doesn't run this code in 32bit anyway?
I was referring to the changes to dprintf, which does run in 16bit mode. There is a script tools/checkstack.py that can calculate the 16bit stack usages.
Ah. That patch raises the stack usage indeed, even without printing 64bit hex numbers ...
cheers, Gerd