The series is developed together with QEMU's: [Qemu-devel] [PATCH RFC 00/17] implement multiple primary busses for pc machines http://lists.gnu.org/archive/html/qemu-ppc/2015-01/msg00159.html (not related to ppc)
v1 -> v2: - Addressed Kevin O'Connor idea (Thanks!) to treat devices behind extra root PCI buses as belonging to Bus 0 for resource resizing and mapping.
The series fixes some issues when more than one root primary bus is present.
First patch scans all the bus range to find the extra root buses. Second patch extends memory and IO mapping for found buses.
Marcel Apfelbaum (2): fw/pci: scan all buses if extraroots romfile is present fw/pci: map memory and IO regions for multiple pci root buses
src/fw/pciinit.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+)
From: Marcel Apfelbaum marcel.a@redhat.com
If there are extra primary root buses, scanning the bus's 0 subtree is not enough. Scan all the range.
Signed-off-by: Marcel Apfelbaum marcel@redhat.com --- src/fw/pciinit.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index fd5dfb9..a5f6505 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -499,8 +499,17 @@ pci_bios_init_bus_rec(int bus, u8 *pci_bus) static void pci_bios_init_bus(void) { + u8 extraroots = romfile_loadint("etc/extra-pci-roots", 0); u8 pci_bus = 0; + pci_bios_init_bus_rec(0 /* host bus */, &pci_bus); + + if (extraroots) { + while (pci_bus < 0xff) { + pci_bus++; + pci_bios_init_bus_rec(pci_bus, &pci_bus); + } + } }
For resource sizing and mapping purposes treat devices on extra root buses as if they are on the default root bus (bus 0).
Signed-off-by: Marcel Apfelbaum marcel@redhat.com --- src/fw/pciinit.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index a5f6505..c399698 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -58,6 +58,7 @@ struct pci_region { struct pci_bus { struct pci_region r[PCI_REGION_TYPE_COUNT]; struct pci_device *bus_dev; + int res_on_default_bus; };
static u32 pci_bar(struct pci_device *pci, int region_num) @@ -686,6 +687,14 @@ static int pci_bios_check_devices(struct pci_bus *busses) busses[pci->secondary_bus].bus_dev = pci;
struct pci_bus *bus = &busses[pci_bdf_to_bus(pci->bdf)]; + if (!bus->bus_dev && pci_bdf_to_bus(pci->bdf)) { + bus->res_on_default_bus = 1; + /* + * Treat devices on extra root buses as if they + * were on the default root bus (bus 0) + */ + bus = &busses[0]; + } int i; for (i = 0; i < PCI_NUM_REGIONS; i++) { if ((pci->class == PCI_CLASS_BRIDGE_PCI) && @@ -716,6 +725,8 @@ static int pci_bios_check_devices(struct pci_bus *busses) if (!s->bus_dev) continue; struct pci_bus *parent = &busses[pci_bdf_to_bus(s->bus_dev->bdf)]; + if (parent->res_on_default_bus) + parent = &busses[0]; int type; int hotplug_support = pci_bus_hotplug_support(s); for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) {
On Mon, Feb 16, 2015 at 11:31:03AM +0200, Marcel Apfelbaum wrote:
For resource sizing and mapping purposes treat devices on extra root buses as if they are on the default root bus (bus 0).
Remind me - did you need the resources for each extra root bus to be continuous (ie, not interspersed with resources allocated to devices on the root bus)? If I read your patch correctly, it wont segregate resources on the extra buses.
-Kevin
Signed-off-by: Marcel Apfelbaum marcel@redhat.com
src/fw/pciinit.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index a5f6505..c399698 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -58,6 +58,7 @@ struct pci_region { struct pci_bus { struct pci_region r[PCI_REGION_TYPE_COUNT]; struct pci_device *bus_dev;
- int res_on_default_bus;
};
static u32 pci_bar(struct pci_device *pci, int region_num) @@ -686,6 +687,14 @@ static int pci_bios_check_devices(struct pci_bus *busses) busses[pci->secondary_bus].bus_dev = pci;
struct pci_bus *bus = &busses[pci_bdf_to_bus(pci->bdf)];
if (!bus->bus_dev && pci_bdf_to_bus(pci->bdf)) {
bus->res_on_default_bus = 1;
/*
* Treat devices on extra root buses as if they
* were on the default root bus (bus 0)
*/
bus = &busses[0];
} int i; for (i = 0; i < PCI_NUM_REGIONS; i++) { if ((pci->class == PCI_CLASS_BRIDGE_PCI) &&
@@ -716,6 +725,8 @@ static int pci_bios_check_devices(struct pci_bus *busses) if (!s->bus_dev) continue; struct pci_bus *parent = &busses[pci_bdf_to_bus(s->bus_dev->bdf)];
if (parent->res_on_default_bus)
parent = &busses[0]; int type; int hotplug_support = pci_bus_hotplug_support(s); for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) {
-- 2.1.0
On 02/16/2015 04:55 PM, Kevin O'Connor wrote:
On Mon, Feb 16, 2015 at 11:31:03AM +0200, Marcel Apfelbaum wrote:
For resource sizing and mapping purposes treat devices on extra root buses as if they are on the default root bus (bus 0).
Hi Kevin, Thank you for the review.
Remind me - did you need the resources for each extra root bus to be continuous (ie, not interspersed with resources allocated to devices on the root bus)? If I read your patch correctly, it wont segregate resources on the extra buses.
I did need them continuous for prev implementation. I modified the QEMU code to be generic allowing mem/IO regions not to be continuous (per PCI root bus). The ACPI and the Operating systems do not require this, so the correct way is to achieve better resources utilization.
Thanks, Marcel
-Kevin
Signed-off-by: Marcel Apfelbaum marcel@redhat.com
src/fw/pciinit.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c index a5f6505..c399698 100644 --- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -58,6 +58,7 @@ struct pci_region { struct pci_bus { struct pci_region r[PCI_REGION_TYPE_COUNT]; struct pci_device *bus_dev;
int res_on_default_bus; };
static u32 pci_bar(struct pci_device *pci, int region_num)
@@ -686,6 +687,14 @@ static int pci_bios_check_devices(struct pci_bus *busses) busses[pci->secondary_bus].bus_dev = pci;
struct pci_bus *bus = &busses[pci_bdf_to_bus(pci->bdf)];
if (!bus->bus_dev && pci_bdf_to_bus(pci->bdf)) {
bus->res_on_default_bus = 1;
/*
* Treat devices on extra root buses as if they
* were on the default root bus (bus 0)
*/
bus = &busses[0];
} int i; for (i = 0; i < PCI_NUM_REGIONS; i++) { if ((pci->class == PCI_CLASS_BRIDGE_PCI) &&
@@ -716,6 +725,8 @@ static int pci_bios_check_devices(struct pci_bus *busses) if (!s->bus_dev) continue; struct pci_bus *parent = &busses[pci_bdf_to_bus(s->bus_dev->bdf)];
if (parent->res_on_default_bus)
parent = &busses[0]; int type; int hotplug_support = pci_bus_hotplug_support(s); for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) {
-- 2.1.0
On Mon, Feb 16, 2015 at 05:43:31PM +0200, Marcel Apfelbaum wrote:
On 02/16/2015 04:55 PM, Kevin O'Connor wrote:
On Mon, Feb 16, 2015 at 11:31:03AM +0200, Marcel Apfelbaum wrote:
For resource sizing and mapping purposes treat devices on extra root buses as if they are on the default root bus (bus 0).
Hi Kevin, Thank you for the review.
Remind me - did you need the resources for each extra root bus to be continuous (ie, not interspersed with resources allocated to devices on the root bus)? If I read your patch correctly, it wont segregate resources on the extra buses.
I did need them continuous for prev implementation. I modified the QEMU code to be generic allowing mem/IO regions not to be continuous (per PCI root bus). The ACPI and the Operating systems do not require this, so the correct way is to achieve better resources utilization.
Okay, thanks.
BTW, do you need the res_on_default_bus flag? Would the below work (totally untested)?
-Kevin
--- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -668,6 +668,9 @@ static int pci_bios_check_devices(struct pci_bus *busses) busses[pci->secondary_bus].bus_dev = pci;
struct pci_bus *bus = &busses[pci_bdf_to_bus(pci->bdf)]; + if (!bus->bus_dev) + // Resources for all root busses go in busses[0] + bus = &busses[0]; int i; for (i = 0; i < PCI_NUM_REGIONS; i++) { if ((pci->class == PCI_CLASS_BRIDGE_PCI) && @@ -698,6 +701,9 @@ static int pci_bios_check_devices(struct pci_bus *busses) if (!s->bus_dev) continue; struct pci_bus *parent = &busses[pci_bdf_to_bus(s->bus_dev->bdf)]; + if (!parent->bus_dev) + // Resources for all root busses go in busses[0] + parent = &busses[0]; int type; int hotplug_support = pci_bus_hotplug_support(s); for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) {
On 02/16/2015 06:07 PM, Kevin O'Connor wrote:
On Mon, Feb 16, 2015 at 05:43:31PM +0200, Marcel Apfelbaum wrote:
On 02/16/2015 04:55 PM, Kevin O'Connor wrote:
On Mon, Feb 16, 2015 at 11:31:03AM +0200, Marcel Apfelbaum wrote:
For resource sizing and mapping purposes treat devices on extra root buses as if they are on the default root bus (bus 0).
Hi Kevin, Thank you for the review.
Remind me - did you need the resources for each extra root bus to be continuous (ie, not interspersed with resources allocated to devices on the root bus)? If I read your patch correctly, it wont segregate resources on the extra buses.
I did need them continuous for prev implementation. I modified the QEMU code to be generic allowing mem/IO regions not to be continuous (per PCI root bus). The ACPI and the Operating systems do not require this, so the correct way is to achieve better resources utilization.
Okay, thanks.
BTW, do you need the res_on_default_bus flag? Would the below work (totally untested)?
I tried that already :) It will not work if you have a pci-bridge behind a pci-bridge (and so on...)
Thanks, Marcel
-Kevin
--- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -668,6 +668,9 @@ static int pci_bios_check_devices(struct pci_bus *busses) busses[pci->secondary_bus].bus_dev = pci;
struct pci_bus *bus = &busses[pci_bdf_to_bus(pci->bdf)];
if (!bus->bus_dev)
// Resources for all root busses go in busses[0]
bus = &busses[0]; int i; for (i = 0; i < PCI_NUM_REGIONS; i++) { if ((pci->class == PCI_CLASS_BRIDGE_PCI) &&
@@ -698,6 +701,9 @@ static int pci_bios_check_devices(struct pci_bus *busses) if (!s->bus_dev) continue; struct pci_bus *parent = &busses[pci_bdf_to_bus(s->bus_dev->bdf)];
if (!parent->bus_dev)
// Resources for all root busses go in busses[0]
parent = &busses[0]; int type; int hotplug_support = pci_bus_hotplug_support(s); for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) {
On Mon, Feb 16, 2015 at 06:14:57PM +0200, Marcel Apfelbaum wrote:
On 02/16/2015 06:07 PM, Kevin O'Connor wrote:
On Mon, Feb 16, 2015 at 05:43:31PM +0200, Marcel Apfelbaum wrote:
On 02/16/2015 04:55 PM, Kevin O'Connor wrote:
On Mon, Feb 16, 2015 at 11:31:03AM +0200, Marcel Apfelbaum wrote:
For resource sizing and mapping purposes treat devices on extra root buses as if they are on the default root bus (bus 0).
Hi Kevin, Thank you for the review.
Remind me - did you need the resources for each extra root bus to be continuous (ie, not interspersed with resources allocated to devices on the root bus)? If I read your patch correctly, it wont segregate resources on the extra buses.
I did need them continuous for prev implementation. I modified the QEMU code to be generic allowing mem/IO regions not to be continuous (per PCI root bus). The ACPI and the Operating systems do not require this, so the correct way is to achieve better resources utilization.
Okay, thanks.
BTW, do you need the res_on_default_bus flag? Would the below work (totally untested)?
I tried that already :) It will not work if you have a pci-bridge behind a pci-bridge (and so on...)
Okay - I'm missing something then. The only place bus->bus_dev==NULL is for root busses (busses[0] or extra root busses), and the only place bus->res_on_default_bus is set is when bus->bus_dev==NULL. So, why can't bus->bus_dev==NULL be tested instead of bus->res_on_default_bus?
If it can be done, the patch below is likely better (totally untested).
-Kevin
--- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -601,7 +601,7 @@ static void pci_region_migrate_64bit_entries(struct pci_region *from, }
static struct pci_region_entry * -pci_region_create_entry(struct pci_bus *bus, struct pci_device *dev, +pci_region_create_entry(struct pci_bus *busses, struct pci_device *dev, int bar, u64 size, u64 align, int type, int is64) { struct pci_region_entry *entry = malloc_tmp(sizeof(*entry)); @@ -617,6 +617,10 @@ pci_region_create_entry(struct pci_bus *bus, struct pci_device *dev, entry->is64 = is64; entry->type = type; // Insert into list in sorted order. + struct pci_bus *bus = &busses[pci_bdf_to_bus(dev->bdf)]; + if (!bus->bus_dev) + // Resources for all root busses go in busses[0] + bus = &busses[0]; struct hlist_node **pprev; struct pci_region_entry *pos; hlist_for_each_entry_pprev(pos, pprev, &bus->r[type].list, node) { @@ -667,7 +671,6 @@ static int pci_bios_check_devices(struct pci_bus *busses) if (pci->class == PCI_CLASS_BRIDGE_PCI) busses[pci->secondary_bus].bus_dev = pci;
- struct pci_bus *bus = &busses[pci_bdf_to_bus(pci->bdf)]; int i; for (i = 0; i < PCI_NUM_REGIONS; i++) { if ((pci->class == PCI_CLASS_BRIDGE_PCI) && @@ -682,7 +685,7 @@ static int pci_bios_check_devices(struct pci_bus *busses) if (type != PCI_REGION_TYPE_IO && size < PCI_DEVICE_MEM_MIN) size = PCI_DEVICE_MEM_MIN; struct pci_region_entry *entry = pci_region_create_entry( - bus, pci, i, size, size, type, is64); + busses, pci, i, size, size, type, is64); if (!entry) return -1;
@@ -697,7 +700,6 @@ static int pci_bios_check_devices(struct pci_bus *busses) struct pci_bus *s = &busses[secondary_bus]; if (!s->bus_dev) continue; - struct pci_bus *parent = &busses[pci_bdf_to_bus(s->bus_dev->bdf)]; int type; int hotplug_support = pci_bus_hotplug_support(s); for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { @@ -715,7 +717,7 @@ static int pci_bios_check_devices(struct pci_bus *busses) s->bus_dev, type); // entry->bar is -1 if the entry represents a bridge region struct pci_region_entry *entry = pci_region_create_entry( - parent, s->bus_dev, -1, size, align, type, is64); + busses, s->bus_dev, -1, size, align, type, is64); if (!entry) return -1; dprintf(1, "PCI: secondary bus %d size %08llx type %s\n",
On 02/16/2015 06:36 PM, Kevin O'Connor wrote:
On Mon, Feb 16, 2015 at 06:14:57PM +0200, Marcel Apfelbaum wrote:
On 02/16/2015 06:07 PM, Kevin O'Connor wrote:
On Mon, Feb 16, 2015 at 05:43:31PM +0200, Marcel Apfelbaum wrote:
On 02/16/2015 04:55 PM, Kevin O'Connor wrote:
On Mon, Feb 16, 2015 at 11:31:03AM +0200, Marcel Apfelbaum wrote:
For resource sizing and mapping purposes treat devices on extra root buses as if they are on the default root bus (bus 0).
Hi Kevin, Thank you for the review.
Remind me - did you need the resources for each extra root bus to be continuous (ie, not interspersed with resources allocated to devices on the root bus)? If I read your patch correctly, it wont segregate resources on the extra buses.
I did need them continuous for prev implementation. I modified the QEMU code to be generic allowing mem/IO regions not to be continuous (per PCI root bus). The ACPI and the Operating systems do not require this, so the correct way is to achieve better resources utilization.
Okay, thanks.
BTW, do you need the res_on_default_bus flag? Would the below work (totally untested)?
I tried that already :) It will not work if you have a pci-bridge behind a pci-bridge (and so on...)
Okay - I'm missing something then. The only place bus->bus_dev==NULL is for root busses (busses[0] or extra root busses), and the only place bus->res_on_default_bus is set is when bus->bus_dev==NULL. So, why can't bus->bus_dev==NULL be tested instead of bus->res_on_default_bus?
You are right! As I previously said, I tried it by myself and failed. I might have missed some things up when I tested it.
The prev patch also works OK and I prefer it because it is simple. I'll send a new version shortly.
Thanks, Marcel
If it can be done, the patch below is likely better (totally untested).
-Kevin
--- a/src/fw/pciinit.c +++ b/src/fw/pciinit.c @@ -601,7 +601,7 @@ static void pci_region_migrate_64bit_entries(struct pci_region *from, }
static struct pci_region_entry * -pci_region_create_entry(struct pci_bus *bus, struct pci_device *dev, +pci_region_create_entry(struct pci_bus *busses, struct pci_device *dev, int bar, u64 size, u64 align, int type, int is64) { struct pci_region_entry *entry = malloc_tmp(sizeof(*entry)); @@ -617,6 +617,10 @@ pci_region_create_entry(struct pci_bus *bus, struct pci_device *dev, entry->is64 = is64; entry->type = type; // Insert into list in sorted order.
- struct pci_bus *bus = &busses[pci_bdf_to_bus(dev->bdf)];
- if (!bus->bus_dev)
// Resources for all root busses go in busses[0]
bus = &busses[0]; struct hlist_node **pprev; struct pci_region_entry *pos; hlist_for_each_entry_pprev(pos, pprev, &bus->r[type].list, node) {
@@ -667,7 +671,6 @@ static int pci_bios_check_devices(struct pci_bus *busses) if (pci->class == PCI_CLASS_BRIDGE_PCI) busses[pci->secondary_bus].bus_dev = pci;
struct pci_bus *bus = &busses[pci_bdf_to_bus(pci->bdf)]; int i; for (i = 0; i < PCI_NUM_REGIONS; i++) { if ((pci->class == PCI_CLASS_BRIDGE_PCI) &&
@@ -682,7 +685,7 @@ static int pci_bios_check_devices(struct pci_bus *busses) if (type != PCI_REGION_TYPE_IO && size < PCI_DEVICE_MEM_MIN) size = PCI_DEVICE_MEM_MIN; struct pci_region_entry *entry = pci_region_create_entry(
bus, pci, i, size, size, type, is64);
busses, pci, i, size, size, type, is64); if (!entry) return -1;
@@ -697,7 +700,6 @@ static int pci_bios_check_devices(struct pci_bus *busses) struct pci_bus *s = &busses[secondary_bus]; if (!s->bus_dev) continue;
struct pci_bus *parent = &busses[pci_bdf_to_bus(s->bus_dev->bdf)]; int type; int hotplug_support = pci_bus_hotplug_support(s); for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) {
@@ -715,7 +717,7 @@ static int pci_bios_check_devices(struct pci_bus *busses) s->bus_dev, type); // entry->bar is -1 if the entry represents a bridge region struct pci_region_entry *entry = pci_region_create_entry(
parent, s->bus_dev, -1, size, align, type, is64);
busses, s->bus_dev, -1, size, align, type, is64); if (!entry) return -1; dprintf(1, "PCI: secondary bus %d size %08llx type %s\n",
Hi,
The series is developed together with QEMU's: [Qemu-devel] [PATCH RFC 00/17] implement multiple primary busses for pc machines http://lists.gnu.org/archive/html/qemu-ppc/2015-01/msg00159.html (not related to ppc)
v1 -> v2:
- Addressed Kevin O'Connor idea (Thanks!) to treat devices behind extra root PCI buses as belonging to Bus 0 for resource resizing and mapping.
The series fixes some issues when more than one root primary bus is present.
First patch scans all the bus range to find the extra root buses. Second patch extends memory and IO mapping for found buses.
Committed to master now. Series is still in review on the qemu side. But it is clear that there are no general objections to this and the fw_cfg interface will not change too. Details on aml generation are still hashed out.
cheers, Gerd