Hi,
Next round of the two-pass initialization patches, addressing the review comments from Kevin.
cheers, Gerd
Gerd Hoffmann (6): pci: add two-pass pci initialization code pci: activate two-pass pci initialization code pci: remove old pci initilaization code pci: init boot devices only on address space shortage pci: cleanup config.h pci: set BUILD_PCIMEM_START to 0xe0000000
Makefile | 2 +- src/config.h | 16 +-- src/pci.c | 3 + src/pci.h | 8 + src/pci_region.c | 77 -------- src/pciinit.c | 537 ++++++++++++++++++++++++++++++++++++------------------ src/util.h | 31 +--- 7 files changed, 377 insertions(+), 297 deletions(-) delete mode 100644 src/pci_region.c
This patch adds a second device scan to the pci initialization, which counts the memory bars of the various sizes and types. Then it calculates the sizes and the packing of the prefetchable and non-prefetchable pci memory windows and prints the results.
The patch doesn't actually map the devices to make debugging easier.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- src/pci.h | 8 ++ src/pciinit.c | 312 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 317 insertions(+), 3 deletions(-)
diff --git a/src/pci.h b/src/pci.h index a21a1fd..e9e191a 100644 --- a/src/pci.h +++ b/src/pci.h @@ -3,6 +3,9 @@
#include "types.h" // u32
+#define PCI_ROM_SLOT 6 +#define PCI_NUM_REGIONS 7 + static inline u8 pci_bdf_to_bus(u16 bdf) { return bdf >> 8; } @@ -48,6 +51,11 @@ struct pci_device { u8 prog_if, revision; u8 header_type; u8 secondary_bus; + struct { + u32 addr; + u32 size; + int is64; + } bars[PCI_NUM_REGIONS];
// Local information on device. int have_driver; diff --git a/src/pciinit.c b/src/pciinit.c index bfff3db..b0a712d 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -12,15 +12,83 @@ #include "pci_regs.h" // PCI_COMMAND #include "xen.h" // usingXen
-#define PCI_ROM_SLOT 6 -#define PCI_NUM_REGIONS 7 +#define PCI_IO_INDEX_SHIFT 2 +#define PCI_MEM_INDEX_SHIFT 12
-static void pci_bios_init_device_in_bus(int bus); +#define PCI_BRIDGE_IO_MIN 0x100 +#define PCI_BRIDGE_MEM_MIN 0x100000
static struct pci_region pci_bios_io_region; static struct pci_region pci_bios_mem_region; static struct pci_region pci_bios_prefmem_region;
+enum pci_region_type { + PCI_REGION_TYPE_IO, + PCI_REGION_TYPE_MEM, + PCI_REGION_TYPE_PREFMEM, + PCI_REGION_TYPE_COUNT, +}; + +static const char *region_type_name[] = { + [ PCI_REGION_TYPE_IO ] = "io", + [ PCI_REGION_TYPE_MEM ] = "mem", + [ PCI_REGION_TYPE_PREFMEM ] = "prefmem", +}; + +static struct pci_bus { + struct { + /* pci region stats */ + u32 count[32 - PCI_MEM_INDEX_SHIFT]; + u32 sum, max; + /* seconday bus region sizes */ + u32 size; + /* pci region assignments */ + u32 bases[32 - PCI_MEM_INDEX_SHIFT]; + u32 base; + } r[PCI_REGION_TYPE_COUNT]; +} *busses; +static int busses_count; + +static void pci_bios_init_device_in_bus(int bus); +static void pci_bios_check_device_in_bus(int bus); +static void pci_bios_init_bus_bases(struct pci_bus *bus); +static void pci_bios_map_device_in_bus(int bus); + +static int pci_size_to_index(u32 size, enum pci_region_type type) +{ + int index = __fls(size); + int shift = (type == PCI_REGION_TYPE_IO) ? + PCI_IO_INDEX_SHIFT : PCI_MEM_INDEX_SHIFT; + + if (index < shift) + index = shift; + index -= shift; + return index; +} + +static u32 pci_index_to_size(int index, enum pci_region_type type) +{ + int shift = (type == PCI_REGION_TYPE_IO) ? + PCI_IO_INDEX_SHIFT : PCI_MEM_INDEX_SHIFT; + + return 1 << (index + shift); +} + +static enum pci_region_type pci_addr_to_type(u32 addr) +{ + if (addr & PCI_BASE_ADDRESS_SPACE_IO) + return PCI_REGION_TYPE_IO; + if (addr & PCI_BASE_ADDRESS_MEM_PREFETCH) + return PCI_REGION_TYPE_PREFMEM; + return PCI_REGION_TYPE_MEM; +} + +static u32 pci_size_roundup(u32 size) +{ + int index = __fls(size); + return 1 << index; +} + /* host irqs corresponding to PCI irqs A-D */ const u8 pci_irqs[4] = { 10, 10, 11, 11 @@ -442,6 +510,221 @@ pci_bios_init_bus(void) pci_bios_init_bus_rec(0 /* host bus */, &pci_bus); }
+static void pci_bios_bus_get_bar(struct pci_bus *bus, int bdf, int bar, + u32 *val, u32 *size) +{ + u32 ofs = pci_bar(bdf, bar); + u32 old = pci_config_readl(bdf, ofs); + u32 mask; + + if (bar == PCI_ROM_SLOT) { + mask = PCI_ROM_ADDRESS_MASK; + pci_config_writel(bdf, ofs, mask); + } else { + if (old & PCI_BASE_ADDRESS_SPACE_IO) + mask = PCI_BASE_ADDRESS_IO_MASK; + else + mask = PCI_BASE_ADDRESS_MEM_MASK; + pci_config_writel(bdf, ofs, ~0); + } + *val = pci_config_readl(bdf, ofs); + pci_config_writel(bdf, ofs, old); + *size = (~(*val & mask)) + 1; +} + +static void pci_bios_bus_reserve(struct pci_bus *bus, int type, u32 size) +{ + u32 index; + + index = pci_size_to_index(size, type); + size = pci_index_to_size(index, type); + bus->r[type].count[index]++; + bus->r[type].sum += size; + if (bus->r[type].max < size) + bus->r[type].max = size; +} + +static u32 pci_bios_bus_get_addr(struct pci_bus *bus, int type, u32 size) +{ + u32 index, addr; + + index = pci_size_to_index(size, type); + addr = bus->r[type].bases[index]; + bus->r[type].bases[index] += pci_index_to_size(index, type); + return addr; +} + +static void pci_bios_check_device(struct pci_bus *bus, struct pci_device *dev) +{ + u16 bdf = dev->bdf; + u32 limit; + int i,type; + + if (dev->class == PCI_CLASS_BRIDGE_PCI) { + if (dev->secondary_bus >= busses_count) { + /* should never trigger */ + dprintf(1, "PCI: bus count too small (%d), skipping bus #%d\n", + busses_count, dev->secondary_bus); + return; + } + struct pci_bus *s = busses + dev->secondary_bus; + pci_bios_check_device_in_bus(dev->secondary_bus); + for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { + s->r[type].size = pci_size_roundup(s->r[type].sum); + limit = (type == PCI_REGION_TYPE_IO) ? + PCI_BRIDGE_IO_MIN : PCI_BRIDGE_MEM_MIN; + if (s->r[type].size < limit) + s->r[type].size = limit; + pci_bios_bus_reserve(bus, type, s->r[type].size); + } + dprintf(1, "PCI: secondary bus %d sizes: io %x, mem %x, prefmem %x\n", + dev->secondary_bus, + s->r[PCI_REGION_TYPE_IO].size, + s->r[PCI_REGION_TYPE_MEM].size, + s->r[PCI_REGION_TYPE_PREFMEM].size); + return; + } + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + u32 val, size; + pci_bios_bus_get_bar(bus, bdf, i, &val, &size); + if (val == 0) { + continue; + } + pci_bios_bus_reserve(bus, pci_addr_to_type(val), size); + dev->bars[i].addr = val; + dev->bars[i].size = size; + dev->bars[i].is64 = (!(val & PCI_BASE_ADDRESS_SPACE_IO) && + (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64); + + if (dev->bars[i].is64) { + i++; + } + } +} + +static void pci_bios_map_device(struct pci_bus *bus, struct pci_device *dev) +{ + int type, i; + + if (dev->class == PCI_CLASS_BRIDGE_PCI) { + if (dev->secondary_bus >= busses_count) { + return; + } + struct pci_bus *s = busses + dev->secondary_bus; + + for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { + s->r[type].base = pci_bios_bus_get_addr(bus, type, s->r[type].size); + } + dprintf(1, "PCI: init bases bus %d (secondary)\n", dev->secondary_bus); + pci_bios_init_bus_bases(s); + /* TODO: commit assignments */ + + pci_bios_map_device_in_bus(dev->secondary_bus); + return; + } + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + u32 addr; + if (dev->bars[i].addr == 0) { + continue; + } + + addr = pci_bios_bus_get_addr(bus, pci_addr_to_type(dev->bars[i].addr), + dev->bars[i].size); + dprintf(1, " bar %d, addr %x, size %x [%s]\n", + i, addr, dev->bars[i].size, + dev->bars[i].addr & PCI_BASE_ADDRESS_SPACE_IO ? "io" : "mem"); + /* TODO: commit assignments */ + + if (dev->bars[i].is64) { + i++; + } + } +} + +static void pci_bios_check_device_in_bus(int bus) +{ + struct pci_device *pci; + + dprintf(1, "PCI: check devices bus %d\n", bus); + foreachpci(pci) { + if (pci_bdf_to_bus(pci->bdf) != bus) + continue; + pci_bios_check_device(&busses[bus], pci); + } +} + +static void pci_bios_map_device_in_bus(int bus) +{ + struct pci_device *pci; + + foreachpci(pci) { + if (pci_bdf_to_bus(pci->bdf) != bus) + continue; + dprintf(1, "PCI: map device bus %d, bfd 0x%x\n", bus, pci->bdf); + pci_bios_map_device(&busses[bus], pci); + } +} + +static void pci_bios_init_bus_bases(struct pci_bus *bus) +{ + u32 base, newbase, size; + int type, i; + + for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { + dprintf(1, " type %s max %x sum %x base %x\n", region_type_name[type], + bus->r[type].max, bus->r[type].sum, bus->r[type].base); + base = bus->r[type].base; + for (i = ARRAY_SIZE(bus->r[type].count)-1; i >= 0; i--) { + size = pci_index_to_size(i, type); + if (!bus->r[type].count[i]) + continue; + newbase = base + size * bus->r[type].count[i]; + dprintf(1, " size %8x: %d bar(s), %8x -> %8x\n", + size, bus->r[type].count[i], base, newbase - 1); + bus->r[type].bases[i] = base; + base = newbase; + } + } +} + +#define ROOT_BASE(top, sum, align) ALIGN_DOWN((top)-(sum),(align)) + +static int pci_bios_init_root_regions(u32 start, u32 end) +{ + struct pci_bus *bus = &busses[0]; + + bus->r[PCI_REGION_TYPE_IO].base = 0xc000; + + if (bus->r[PCI_REGION_TYPE_MEM].sum < bus->r[PCI_REGION_TYPE_PREFMEM].sum) { + bus->r[PCI_REGION_TYPE_MEM].base = + ROOT_BASE(end, + bus->r[PCI_REGION_TYPE_MEM].sum, + bus->r[PCI_REGION_TYPE_MEM].max); + bus->r[PCI_REGION_TYPE_PREFMEM].base = + ROOT_BASE(bus->r[PCI_REGION_TYPE_MEM].base, + bus->r[PCI_REGION_TYPE_PREFMEM].sum, + bus->r[PCI_REGION_TYPE_PREFMEM].max); + if (bus->r[PCI_REGION_TYPE_PREFMEM].base >= start) { + return 0; + } + } else { + bus->r[PCI_REGION_TYPE_PREFMEM].base = + ROOT_BASE(end, + bus->r[PCI_REGION_TYPE_PREFMEM].sum, + bus->r[PCI_REGION_TYPE_PREFMEM].max); + bus->r[PCI_REGION_TYPE_MEM].base = + ROOT_BASE(bus->r[PCI_REGION_TYPE_PREFMEM].base, + bus->r[PCI_REGION_TYPE_MEM].sum, + bus->r[PCI_REGION_TYPE_MEM].max); + if (bus->r[PCI_REGION_TYPE_MEM].base >= start) { + return 0; + } + } + return -1; +} + void pci_setup(void) { @@ -453,19 +736,42 @@ pci_setup(void)
dprintf(3, "pci setup\n");
+ u32 start = BUILD_PCIMEM_START; + u32 end = BUILD_IOAPIC_ADDR; + pci_region_init(&pci_bios_io_region, 0xc000, 64 * 1024 - 1); pci_region_init(&pci_bios_mem_region, BUILD_PCIMEM_START, BUILD_PCIMEM_END - 1); pci_region_init(&pci_bios_prefmem_region, BUILD_PCIPREFMEM_START, BUILD_PCIPREFMEM_END - 1);
+ dprintf(1, "=== PCI bus & bridge init ===\n"); pci_bios_init_bus();
+ dprintf(1, "=== PCI device probing ===\n"); pci_probe();
+ dprintf(1, "=== PCI new allocation pass #1 ===\n"); + busses_count = MaxPCIBus + 1; + busses = malloc_tmp(sizeof(*busses) * busses_count); + pci_bios_check_device_in_bus(0 /* host bus */); + if (pci_bios_init_root_regions(start, end) != 0) { + dprintf(1, "PCI: out of address space\n"); + /* Hmm, what do now? */ + } + + dprintf(1, "=== PCI new allocation pass #2 ===\n"); + dprintf(1, "PCI: init bases bus 0 (primary)\n"); + pci_bios_init_bus_bases(&busses[0]); + pci_bios_map_device_in_bus(0 /* host bus */); + + dprintf(1, "=== PCI old allocation pass ===\n"); struct pci_device *pci; foreachpci(pci) { pci_init_device(pci_isa_bridge_tbl, pci, NULL); } pci_bios_init_device_in_bus(0 /* host bus */); + + free(busses); + busses_count = 0; }
On Tue, Jul 05, 2011 at 05:27:00PM +0200, Gerd Hoffmann wrote:
diff --git a/src/pciinit.c b/src/pciinit.c index bfff3db..b0a712d 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -12,15 +12,83 @@ #include "pci_regs.h" // PCI_COMMAND #include "xen.h" // usingXen
-#define PCI_ROM_SLOT 6 -#define PCI_NUM_REGIONS 7 +#define PCI_IO_INDEX_SHIFT 2 +#define PCI_MEM_INDEX_SHIFT 12
-static void pci_bios_init_device_in_bus(int bus); +#define PCI_BRIDGE_IO_MIN 0x100
This seems a little low: virtio block devices need I think 64 bytes at the moment. To have 32 of these behind a bridge we'll need to allocate 2K of io memory per bridge. this means we can have at most 31 such bridges but that seems a reasonable limitation to me.
On 07/06/11 10:54, Michael S. Tsirkin wrote:
On Tue, Jul 05, 2011 at 05:27:00PM +0200, Gerd Hoffmann wrote:
diff --git a/src/pciinit.c b/src/pciinit.c index bfff3db..b0a712d 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -12,15 +12,83 @@ #include "pci_regs.h" // PCI_COMMAND #include "xen.h" // usingXen
-#define PCI_ROM_SLOT 6 -#define PCI_NUM_REGIONS 7 +#define PCI_IO_INDEX_SHIFT 2 +#define PCI_MEM_INDEX_SHIFT 12
-static void pci_bios_init_device_in_bus(int bus); +#define PCI_BRIDGE_IO_MIN 0x100
This seems a little low: virtio block devices need I think 64 bytes at the moment. To have 32 of these behind a bridge we'll need to allocate 2K of io memory per bridge. this means we can have at most 31 such bridges but that seems a reasonable limitation to me.
Guess we'll need to have different bridges in different ways then, depending on the number of devices which could be hooked up there. PCI express ports with a single device behind it certainly needs less ressources than a bridge which can hook up 32 devices ...
With the q35 machine I see a bunch of different pci express bridges:
Express Root Port Express Upstream Port Express Downstream Port
and classic pci bridges (DECchip 21154).
Guess to identify the pci express devices we'll have to look at the capabilities? Suggestions how to classify all these?
cheers, Gerd
On Wed, Jul 06, 2011 at 12:28:00PM +0200, Gerd Hoffmann wrote:
On 07/06/11 10:54, Michael S. Tsirkin wrote:
On Tue, Jul 05, 2011 at 05:27:00PM +0200, Gerd Hoffmann wrote:
diff --git a/src/pciinit.c b/src/pciinit.c index bfff3db..b0a712d 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -12,15 +12,83 @@ #include "pci_regs.h" // PCI_COMMAND #include "xen.h" // usingXen
-#define PCI_ROM_SLOT 6 -#define PCI_NUM_REGIONS 7 +#define PCI_IO_INDEX_SHIFT 2 +#define PCI_MEM_INDEX_SHIFT 12
-static void pci_bios_init_device_in_bus(int bus); +#define PCI_BRIDGE_IO_MIN 0x100
This seems a little low: virtio block devices need I think 64 bytes at the moment. To have 32 of these behind a bridge we'll need to allocate 2K of io memory per bridge. this means we can have at most 31 such bridges but that seems a reasonable limitation to me.
Guess we'll need to have different bridges in different ways then, depending on the number of devices which could be hooked up there. PCI express ports with a single device behind it certainly needs less ressources than a bridge which can hook up 32 devices ...
With the q35 machine I see a bunch of different pci express bridges:
Express Root Port Express Upstream Port Express Downstream Port
and classic pci bridges (DECchip 21154).
Guess to identify the pci express devices we'll have to look at the capabilities?
Yes.
Suggestions how to classify all these?
7.8.2 PCI Express capabilities register includes the device/port type register. Look at 1.3 PCI Express Fabric Topology to understand what does each device type do.
cheers, Gerd
BTW, PCI bridges have slot numbering capabilities for extension slots. I'm still trying to figure out how hotplug works there but, if present, that gives an upper limit on the number of slots. It's not present in system expansion slots though and we didn't implement in qemu at all.
On Tue, Jul 05, 2011 at 05:27:00PM +0200, Gerd Hoffmann wrote:
This patch adds a second device scan to the pci initialization, which counts the memory bars of the various sizes and types. Then it calculates the sizes and the packing of the prefetchable and non-prefetchable pci memory windows and prints the results.
The patch doesn't actually map the devices to make debugging easier.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com
A couple of minor nits and style suggestions below.
src/pci.h | 8 ++ src/pciinit.c | 312 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 317 insertions(+), 3 deletions(-)
diff --git a/src/pci.h b/src/pci.h index a21a1fd..e9e191a 100644 --- a/src/pci.h +++ b/src/pci.h @@ -3,6 +3,9 @@
#include "types.h" // u32
+#define PCI_ROM_SLOT 6 +#define PCI_NUM_REGIONS 7
static inline u8 pci_bdf_to_bus(u16 bdf) { return bdf >> 8; } @@ -48,6 +51,11 @@ struct pci_device { u8 prog_if, revision; u8 header_type; u8 secondary_bus;
struct {
u32 addr;
u32 size;
int is64;
} bars[PCI_NUM_REGIONS];
// Local information on device. int have_driver;
diff --git a/src/pciinit.c b/src/pciinit.c index bfff3db..b0a712d 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -12,15 +12,83 @@ #include "pci_regs.h" // PCI_COMMAND #include "xen.h" // usingXen
-#define PCI_ROM_SLOT 6 -#define PCI_NUM_REGIONS 7 +#define PCI_IO_INDEX_SHIFT 2 +#define PCI_MEM_INDEX_SHIFT 12
-static void pci_bios_init_device_in_bus(int bus); +#define PCI_BRIDGE_IO_MIN 0x100 +#define PCI_BRIDGE_MEM_MIN 0x100000
static struct pci_region pci_bios_io_region; static struct pci_region pci_bios_mem_region; static struct pci_region pci_bios_prefmem_region;
+enum pci_region_type {
- PCI_REGION_TYPE_IO,
- PCI_REGION_TYPE_MEM,
- PCI_REGION_TYPE_PREFMEM,
- PCI_REGION_TYPE_COUNT,
+};
+static const char *region_type_name[] = {
- [ PCI_REGION_TYPE_IO ] = "io",
- [ PCI_REGION_TYPE_MEM ] = "mem",
- [ PCI_REGION_TYPE_PREFMEM ] = "prefmem",
+};
+static struct pci_bus {
- struct {
/* pci region stats */
u32 count[32 - PCI_MEM_INDEX_SHIFT];
u32 sum, max;
/* seconday bus region sizes */
u32 size;
/* pci region assignments */
u32 bases[32 - PCI_MEM_INDEX_SHIFT];
u32 base;
- } r[PCI_REGION_TYPE_COUNT];
+} *busses; +static int busses_count;
+static void pci_bios_init_device_in_bus(int bus); +static void pci_bios_check_device_in_bus(int bus); +static void pci_bios_init_bus_bases(struct pci_bus *bus); +static void pci_bios_map_device_in_bus(int bus);
+static int pci_size_to_index(u32 size, enum pci_region_type type) +{
- int index = __fls(size);
- int shift = (type == PCI_REGION_TYPE_IO) ?
PCI_IO_INDEX_SHIFT : PCI_MEM_INDEX_SHIFT;
- if (index < shift)
index = shift;
- index -= shift;
- return index;
+}
+static u32 pci_index_to_size(int index, enum pci_region_type type) +{
- int shift = (type == PCI_REGION_TYPE_IO) ?
PCI_IO_INDEX_SHIFT : PCI_MEM_INDEX_SHIFT;
- return 1 << (index + shift);
+}
0x1 would make me more comfortable. I am guessing index + shift is almost always < 31, but still.
+static enum pci_region_type pci_addr_to_type(u32 addr) +{
- if (addr & PCI_BASE_ADDRESS_SPACE_IO)
return PCI_REGION_TYPE_IO;
- if (addr & PCI_BASE_ADDRESS_MEM_PREFETCH)
return PCI_REGION_TYPE_PREFMEM;
- return PCI_REGION_TYPE_MEM;
+}
+static u32 pci_size_roundup(u32 size) +{
- int index = __fls(size);
- return 1 << index;
+}
This actually gives a bigger value than necessary if the input is a power of 2 already. The simplest (but not the most efficient) implementation is (0x1 << (index - 1)) < size ? (0x1 << index) : size; Note: 1 << index won't work for 2^31.
/* host irqs corresponding to PCI irqs A-D */ const u8 pci_irqs[4] = { 10, 10, 11, 11 @@ -442,6 +510,221 @@ pci_bios_init_bus(void) pci_bios_init_bus_rec(0 /* host bus */, &pci_bus); }
+static void pci_bios_bus_get_bar(struct pci_bus *bus, int bdf, int bar,
u32 *val, u32 *size)
+{
- u32 ofs = pci_bar(bdf, bar);
- u32 old = pci_config_readl(bdf, ofs);
- u32 mask;
- if (bar == PCI_ROM_SLOT) {
mask = PCI_ROM_ADDRESS_MASK;
pci_config_writel(bdf, ofs, mask);
- } else {
if (old & PCI_BASE_ADDRESS_SPACE_IO)
mask = PCI_BASE_ADDRESS_IO_MASK;
else
mask = PCI_BASE_ADDRESS_MEM_MASK;
pci_config_writel(bdf, ofs, ~0);
- }
- *val = pci_config_readl(bdf, ofs);
- pci_config_writel(bdf, ofs, old);
- *size = (~(*val & mask)) + 1;
+}
+static void pci_bios_bus_reserve(struct pci_bus *bus, int type, u32 size) +{
- u32 index;
- index = pci_size_to_index(size, type);
- size = pci_index_to_size(index, type);
- bus->r[type].count[index]++;
- bus->r[type].sum += size;
- if (bus->r[type].max < size)
bus->r[type].max = size;
+}
+static u32 pci_bios_bus_get_addr(struct pci_bus *bus, int type, u32 size) +{
- u32 index, addr;
- index = pci_size_to_index(size, type);
- addr = bus->r[type].bases[index];
- bus->r[type].bases[index] += pci_index_to_size(index, type);
- return addr;
+}
+static void pci_bios_check_device(struct pci_bus *bus, struct pci_device *dev) +{
- u16 bdf = dev->bdf;
- u32 limit;
- int i,type;
- if (dev->class == PCI_CLASS_BRIDGE_PCI) {
if (dev->secondary_bus >= busses_count) {
/* should never trigger */
dprintf(1, "PCI: bus count too small (%d), skipping bus #%d\n",
busses_count, dev->secondary_bus);
return;
}
struct pci_bus *s = busses + dev->secondary_bus;
would be nice to move to top of the scope.
pci_bios_check_device_in_bus(dev->secondary_bus);
for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) {
s->r[type].size = pci_size_roundup(s->r[type].sum);
limit = (type == PCI_REGION_TYPE_IO) ?
PCI_BRIDGE_IO_MIN : PCI_BRIDGE_MEM_MIN;
if (s->r[type].size < limit)
s->r[type].size = limit;
pci_bios_bus_reserve(bus, type, s->r[type].size);
}
dprintf(1, "PCI: secondary bus %d sizes: io %x, mem %x, prefmem %x\n",
dev->secondary_bus,
s->r[PCI_REGION_TYPE_IO].size,
s->r[PCI_REGION_TYPE_MEM].size,
s->r[PCI_REGION_TYPE_PREFMEM].size);
return;
- }
- for (i = 0; i < PCI_NUM_REGIONS; i++) {
u32 val, size;
pci_bios_bus_get_bar(bus, bdf, i, &val, &size);
if (val == 0) {
continue;
}
pci_bios_bus_reserve(bus, pci_addr_to_type(val), size);
dev->bars[i].addr = val;
dev->bars[i].size = size;
dev->bars[i].is64 = (!(val & PCI_BASE_ADDRESS_SPACE_IO) &&
(val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64);
if (dev->bars[i].is64) {
i++;
}
- }
+}
+static void pci_bios_map_device(struct pci_bus *bus, struct pci_device *dev) +{
- int type, i;
- if (dev->class == PCI_CLASS_BRIDGE_PCI) {
if (dev->secondary_bus >= busses_count) {
return;
}
struct pci_bus *s = busses + dev->secondary_bus;
for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) {
s->r[type].base = pci_bios_bus_get_addr(bus, type, s->r[type].size);
}
dprintf(1, "PCI: init bases bus %d (secondary)\n", dev->secondary_bus);
pci_bios_init_bus_bases(s);
/* TODO: commit assignments */
pci_bios_map_device_in_bus(dev->secondary_bus);
return;
- }
- for (i = 0; i < PCI_NUM_REGIONS; i++) {
u32 addr;
if (dev->bars[i].addr == 0) {
continue;
}
addr = pci_bios_bus_get_addr(bus, pci_addr_to_type(dev->bars[i].addr),
dev->bars[i].size);
dprintf(1, " bar %d, addr %x, size %x [%s]\n",
i, addr, dev->bars[i].size,
dev->bars[i].addr & PCI_BASE_ADDRESS_SPACE_IO ? "io" : "mem");
/* TODO: commit assignments */
if (dev->bars[i].is64) {
i++;
}
- }
+}
+static void pci_bios_check_device_in_bus(int bus) +{
- struct pci_device *pci;
- dprintf(1, "PCI: check devices bus %d\n", bus);
- foreachpci(pci) {
if (pci_bdf_to_bus(pci->bdf) != bus)
continue;
pci_bios_check_device(&busses[bus], pci);
- }
+}
+static void pci_bios_map_device_in_bus(int bus) +{
- struct pci_device *pci;
- foreachpci(pci) {
if (pci_bdf_to_bus(pci->bdf) != bus)
continue;
dprintf(1, "PCI: map device bus %d, bfd 0x%x\n", bus, pci->bdf);
pci_bios_map_device(&busses[bus], pci);
- }
+}
+static void pci_bios_init_bus_bases(struct pci_bus *bus) +{
- u32 base, newbase, size;
- int type, i;
- for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) {
dprintf(1, " type %s max %x sum %x base %x\n", region_type_name[type],
bus->r[type].max, bus->r[type].sum, bus->r[type].base);
base = bus->r[type].base;
for (i = ARRAY_SIZE(bus->r[type].count)-1; i >= 0; i--) {
size = pci_index_to_size(i, type);
if (!bus->r[type].count[i])
continue;
newbase = base + size * bus->r[type].count[i];
dprintf(1, " size %8x: %d bar(s), %8x -> %8x\n",
size, bus->r[type].count[i], base, newbase - 1);
bus->r[type].bases[i] = base;
base = newbase;
}
- }
+}
+#define ROOT_BASE(top, sum, align) ALIGN_DOWN((top)-(sum),(align))
+static int pci_bios_init_root_regions(u32 start, u32 end) +{
- struct pci_bus *bus = &busses[0];
- bus->r[PCI_REGION_TYPE_IO].base = 0xc000;
- if (bus->r[PCI_REGION_TYPE_MEM].sum < bus->r[PCI_REGION_TYPE_PREFMEM].sum) {
bus->r[PCI_REGION_TYPE_MEM].base =
ROOT_BASE(end,
bus->r[PCI_REGION_TYPE_MEM].sum,
bus->r[PCI_REGION_TYPE_MEM].max);
This aligns down which looks strange to me. Shouldn't we align up to avoid overlap?
bus->r[PCI_REGION_TYPE_PREFMEM].base =
ROOT_BASE(bus->r[PCI_REGION_TYPE_MEM].base,
bus->r[PCI_REGION_TYPE_PREFMEM].sum,
bus->r[PCI_REGION_TYPE_PREFMEM].max);
if (bus->r[PCI_REGION_TYPE_PREFMEM].base >= start) {
return 0;
}
- } else {
bus->r[PCI_REGION_TYPE_PREFMEM].base =
ROOT_BASE(end,
bus->r[PCI_REGION_TYPE_PREFMEM].sum,
bus->r[PCI_REGION_TYPE_PREFMEM].max);
bus->r[PCI_REGION_TYPE_MEM].base =
ROOT_BASE(bus->r[PCI_REGION_TYPE_PREFMEM].base,
bus->r[PCI_REGION_TYPE_MEM].sum,
bus->r[PCI_REGION_TYPE_MEM].max);
if (bus->r[PCI_REGION_TYPE_MEM].base >= start) {
return 0;
}
- }
- return -1;
+}
void pci_setup(void) { @@ -453,19 +736,42 @@ pci_setup(void)
dprintf(3, "pci setup\n");
u32 start = BUILD_PCIMEM_START;
u32 end = BUILD_IOAPIC_ADDR;
pci_region_init(&pci_bios_io_region, 0xc000, 64 * 1024 - 1); pci_region_init(&pci_bios_mem_region, BUILD_PCIMEM_START, BUILD_PCIMEM_END - 1); pci_region_init(&pci_bios_prefmem_region, BUILD_PCIPREFMEM_START, BUILD_PCIPREFMEM_END - 1);
dprintf(1, "=== PCI bus & bridge init ===\n"); pci_bios_init_bus();
dprintf(1, "=== PCI device probing ===\n"); pci_probe();
dprintf(1, "=== PCI new allocation pass #1 ===\n");
busses_count = MaxPCIBus + 1;
busses = malloc_tmp(sizeof(*busses) * busses_count);
pci_bios_check_device_in_bus(0 /* host bus */);
if (pci_bios_init_root_regions(start, end) != 0) {
dprintf(1, "PCI: out of address space\n");
/* Hmm, what do now? */
stall?
- }
- dprintf(1, "=== PCI new allocation pass #2 ===\n");
- dprintf(1, "PCI: init bases bus 0 (primary)\n");
- pci_bios_init_bus_bases(&busses[0]);
- pci_bios_map_device_in_bus(0 /* host bus */);
- dprintf(1, "=== PCI old allocation pass ===\n"); struct pci_device *pci; foreachpci(pci) { pci_init_device(pci_isa_bridge_tbl, pci, NULL); } pci_bios_init_device_in_bus(0 /* host bus */);
- free(busses);
- busses_count = 0;
}
1.7.1
On Thu, Jul 07, 2011 at 07:14:11PM +0300, Michael S. Tsirkin wrote:
+static u32 pci_size_roundup(u32 size) +{
- int index = __fls(size);
- return 1 << index;
+}
This actually gives a bigger value than necessary if the input is a power of 2 already.
A roundup should look like: 1 << (__fls(size - 1) + 1) Assuming size is never less than 2.
[...]
+static void pci_bios_check_device(struct pci_bus *bus, struct pci_device *dev) +{
- u16 bdf = dev->bdf;
- u32 limit;
- int i,type;
- if (dev->class == PCI_CLASS_BRIDGE_PCI) {
if (dev->secondary_bus >= busses_count) {
/* should never trigger */
dprintf(1, "PCI: bus count too small (%d), skipping bus #%d\n",
busses_count, dev->secondary_bus);
return;
}
struct pci_bus *s = busses + dev->secondary_bus;
would be nice to move to top of the scope.
SeaBIOS uses C99 heavily - I'd actually prefer to move all these definitions closer to their first usage.
-Kevin
Hi,
+#define ROOT_BASE(top, sum, align) ALIGN_DOWN((top)-(sum),(align))
+static int pci_bios_init_root_regions(u32 start, u32 end) +{
- struct pci_bus *bus =&busses[0];
- bus->r[PCI_REGION_TYPE_IO].base = 0xc000;
- if (bus->r[PCI_REGION_TYPE_MEM].sum< bus->r[PCI_REGION_TYPE_PREFMEM].sum) {
bus->r[PCI_REGION_TYPE_MEM].base =
ROOT_BASE(end,
bus->r[PCI_REGION_TYPE_MEM].sum,
bus->r[PCI_REGION_TYPE_MEM].max);
This aligns down which looks strange to me. Shouldn't we align up to avoid overlap?
Depends on how you allocate. This starts from the top of the pci address space, so we actually have to align down to avoid overlaps.
cheers, Gerd
On Fri, Jul 08, 2011 at 09:35:39AM +0200, Gerd Hoffmann wrote:
Hi,
+#define ROOT_BASE(top, sum, align) ALIGN_DOWN((top)-(sum),(align))
+static int pci_bios_init_root_regions(u32 start, u32 end) +{
- struct pci_bus *bus =&busses[0];
- bus->r[PCI_REGION_TYPE_IO].base = 0xc000;
- if (bus->r[PCI_REGION_TYPE_MEM].sum< bus->r[PCI_REGION_TYPE_PREFMEM].sum) {
bus->r[PCI_REGION_TYPE_MEM].base =
ROOT_BASE(end,
bus->r[PCI_REGION_TYPE_MEM].sum,
bus->r[PCI_REGION_TYPE_MEM].max);
This aligns down which looks strange to me. Shouldn't we align up to avoid overlap?
Depends on how you allocate. This starts from the top of the pci address space, so we actually have to align down to avoid overlaps.
cheers, Gerd
Hmm but within a bridge we allocate down to top, right?
+static u32 pci_bios_bus_get_addr(struct pci_bus *bus, int type, u32 size) +{ + u32 index, addr; + + index = pci_size_to_index(size, type); + addr = bus->r[type].bases[index]; + bus->r[type].bases[index] += pci_index_to_size(index, type); + return addr; +} +
This seems to fragment the memory more than necessary: why not allocate everything from the top then?
Hmm but within a bridge we allocate down to top, right?
Yes. Just the big mem and prefmem blocks which contain everything are allocated top-down, exactly to avoid fragmenting the memory by moving stuff as far as possible to the top of the address space, leaving a big hole between end of guest ram and start of pci allocations.
Within the memory blocks the bars (and bridge windows) are allocated bottom-up, sorted by size, with no alignment holes between the bars.
The memory layout created this way looks like this:
00000000-00000fff : reserved 00001000-0009efff : System RAM 0009f000-0009ffff : reserved 000f0000-000fffff : reserved 00100000-3fffbfff : System RAM 01000000-01472d32 : Kernel code 01472d33-01b8872f : Kernel data 01c7b000-01e438b7 : Kernel bss 3fffc000-3fffffff : reserved
^^^ RAM
fc000000-fdffffff : 0000:00:02.0 fe000000-fe0fffff : PCI Bus 0000:01 fe100000-fe1fffff : PCI Bus 0000:02
^^^ prefetchable memory block
The whole block is aligned-down according to the alignment requirements of the largest bar (vga framebuffer in that case).
fe900000-fe9fffff : PCI Bus 0000:01 fe900000-fe901fff : 0000:01:01.0 fe900000-fe901fff : sym53c8xx fe902000-fe902fff : 0000:01:00.0 fe902000-fe902fff : ehci_hcd fe903000-fe9033ff : 0000:01:01.0 fe903000-fe9033ff : sym53c8xx fea00000-feafffff : PCI Bus 0000:02 feb00000-feb0ffff : 0000:00:02.0 feb10000-feb1ffff : 0000:00:03.0 feb20000-feb23fff : 0000:00:08.0 feb20000-feb23fff : ICH HD audio feb24000-feb24fff : 0000:00:02.0 feb25000-feb25fff : 0000:00:03.0 feb25000-feb25fff : virtio-pci feb26000-feb26fff : 0000:00:10.0 feb26000-feb26fff : virtio-pci feb27000-feb27fff : 0000:00:11.0 feb27000-feb27fff : virtio-pci feb28000-feb28fff : 0000:00:12.0 feb28000-feb28fff : virtio-pci
^^^ normal memory block
Also aligned down according to the requirements of the largest bar (pci bridge memory window in that case).
fec00000-fec003ff : IOAPIC 0 fed00000-fed003ff : HPET 0 fee00000-fee00fff : Local APIC feffc000-feffffff : reserved fffc0000-ffffffff : reserved
The code sizes the bridge windows to be a power of two right now. That could be changed. It would make the allocation code more complicated though and I suspect it wouldn't make the packing much better. Also we want some free address space for the bridges anyway so we can hotplug stuff, so rounding up the size to the next power of two doesn't hurt much IMHO.
cheers, Gerd
On 07/08/11 17:09, Gerd Hoffmann wrote:
Hmm but within a bridge we allocate down to top, right?
Yes. Just the big mem and prefmem blocks which contain everything are allocated top-down, exactly to avoid fragmenting the memory by moving stuff as far as possible to the top of the address space, leaving a big hole between end of guest ram and start of pci allocations.
Within the memory blocks the bars (and bridge windows) are allocated bottom-up, sorted by size, with no alignment holes between the bars.
Pushed latest bits to http://www.kraxel.org/cgit/seabios/log/?h=pci.4 for those who want to play with it. Will post updated series to the list next week.
cheers, Gerd
Hi,
0x1 would make me more comfortable. I am guessing index + shift is almost always< 31, but still.
fixed.
This actually gives a bigger value than necessary if the input is a power of 2 already.
fixed.
struct pci_bus *s = busses + dev->secondary_bus;
would be nice to move to top of the scope.
that is actually seabios code style ;)
- if (pci_bios_init_root_regions(start, end) != 0) {
dprintf(1, "PCI: out of address space\n");
/* Hmm, what do now? */
stall?
patch #2 makes that a panic.
cheers, Gerd
Michael S. Tsirkin wrote:
]> +static u32 pci_index_to_size(int index, enum pci_region_type type) ]> +{ ]> + int shift = (type == PCI_REGION_TYPE_IO) ? ]> + PCI_IO_INDEX_SHIFT : PCI_MEM_INDEX_SHIFT; ]> + ]> + return 1 << (index + shift); ]> +} ]> + ] ]0x1 would make me more comfortable. ]I am guessing index + shift is almost always < 31, ]but still.
Hello Michael,
I don't understand how 0x1 would differ from 1 above. Is it a cosmetic preference only?
Thanks, Scott
This patch actually enables the two-pass pci initialization and deactivates the old pci initialization bits.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- src/pciinit.c | 35 +++++++++++++++++++++++++++++++++-- 1 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/src/pciinit.c b/src/pciinit.c index b0a712d..eeb4052 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -123,6 +123,8 @@ static void pci_set_io_region_addr(u16 bdf, int region_num, u32 addr) */ static int pci_bios_allocate_region(u16 bdf, int region_num) { + return 0; + struct pci_region *r; u32 ofs = pci_bar(bdf, region_num);
@@ -184,6 +186,8 @@ static int pci_bios_allocate_region(u16 bdf, int region_num)
static void pci_bios_allocate_regions(struct pci_device *pci, void *arg) { + return; + int i; for (i = 0; i < PCI_NUM_REGIONS; i++) { int is_64bit = pci_bios_allocate_region(pci->bdf, i); @@ -239,6 +243,7 @@ static const struct pci_device_id pci_isa_bridge_tbl[] = { #define PCI_PREF_MEMORY_ALIGN (1UL << 20) #define PCI_PREF_MEMORY_SHIFT 16
+#if 0 static void pci_bios_init_device_bridge(struct pci_device *pci, void *arg) { u16 bdf = pci->bdf; @@ -323,6 +328,7 @@ static void pci_bios_init_device_bridge(struct pci_device *pci, void *arg)
pci_config_maskw(bdf, PCI_BRIDGE_CONTROL, 0, PCI_BRIDGE_CTL_SERR); } +#endif
static void storage_ide_init(struct pci_device *pci, void *arg) { @@ -374,9 +380,11 @@ static const struct pci_device_id pci_class_tbl[] = { PCI_DEVICE_CLASS(PCI_VENDOR_ID_APPLE, 0x0017, 0xff00, apple_macio_init), PCI_DEVICE_CLASS(PCI_VENDOR_ID_APPLE, 0x0022, 0xff00, apple_macio_init),
+#if 0 /* PCI bridge */ PCI_DEVICE_CLASS(PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_BRIDGE_PCI, pci_bios_init_device_bridge), +#endif
/* default */ PCI_DEVICE(PCI_ANY_ID, PCI_ANY_ID, pci_bios_allocate_regions), @@ -605,6 +613,7 @@ static void pci_bios_check_device(struct pci_bus *bus, struct pci_device *dev)
static void pci_bios_map_device(struct pci_bus *bus, struct pci_device *dev) { + u16 bdf = dev->bdf; int type, i;
if (dev->class == PCI_CLASS_BRIDGE_PCI) { @@ -612,13 +621,32 @@ static void pci_bios_map_device(struct pci_bus *bus, struct pci_device *dev) return; } struct pci_bus *s = busses + dev->secondary_bus; + u32 base, limit;
for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { s->r[type].base = pci_bios_bus_get_addr(bus, type, s->r[type].size); } dprintf(1, "PCI: init bases bus %d (secondary)\n", dev->secondary_bus); pci_bios_init_bus_bases(s); - /* TODO: commit assignments */ + + base = s->r[PCI_REGION_TYPE_IO].base; + limit = base + s->r[PCI_REGION_TYPE_IO].size - 1; + pci_config_writeb(bdf, PCI_IO_BASE, base >> PCI_IO_SHIFT); + pci_config_writew(bdf, PCI_IO_BASE_UPPER16, 0); + pci_config_writeb(bdf, PCI_IO_LIMIT, limit >> PCI_IO_SHIFT); + pci_config_writew(bdf, PCI_IO_LIMIT_UPPER16, 0); + + base = s->r[PCI_REGION_TYPE_MEM].base; + limit = base + s->r[PCI_REGION_TYPE_MEM].size - 1; + pci_config_writew(bdf, PCI_MEMORY_BASE, base >> PCI_MEMORY_SHIFT); + pci_config_writew(bdf, PCI_MEMORY_LIMIT, limit >> PCI_MEMORY_SHIFT); + + base = s->r[PCI_REGION_TYPE_PREFMEM].base; + limit = base + s->r[PCI_REGION_TYPE_PREFMEM].size - 1; + pci_config_writew(bdf, PCI_PREF_MEMORY_BASE, base >> PCI_PREF_MEMORY_SHIFT); + pci_config_writew(bdf, PCI_PREF_MEMORY_LIMIT, limit >> PCI_PREF_MEMORY_SHIFT); + pci_config_writel(bdf, PCI_PREF_BASE_UPPER32, 0); + pci_config_writel(bdf, PCI_PREF_LIMIT_UPPER32, 0);
pci_bios_map_device_in_bus(dev->secondary_bus); return; @@ -635,7 +663,7 @@ static void pci_bios_map_device(struct pci_bus *bus, struct pci_device *dev) dprintf(1, " bar %d, addr %x, size %x [%s]\n", i, addr, dev->bars[i].size, dev->bars[i].addr & PCI_BASE_ADDRESS_SPACE_IO ? "io" : "mem"); - /* TODO: commit assignments */ + pci_set_io_region_addr(bdf, i, addr);
if (dev->bars[i].is64) { i++; @@ -765,11 +793,14 @@ pci_setup(void) pci_bios_init_bus_bases(&busses[0]); pci_bios_map_device_in_bus(0 /* host bus */);
+#if 0 dprintf(1, "=== PCI old allocation pass ===\n"); struct pci_device *pci; foreachpci(pci) { pci_init_device(pci_isa_bridge_tbl, pci, NULL); } +#endif + pci_bios_init_device_in_bus(0 /* host bus */);
free(busses);
On Tue, Jul 05, 2011 at 05:27:01PM +0200, Gerd Hoffmann wrote:
This patch actually enables the two-pass pci initialization and deactivates the old pci initialization bits.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com
Do people really want to keep old code ifdef'd in this way?
src/pciinit.c | 35 +++++++++++++++++++++++++++++++++-- 1 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/src/pciinit.c b/src/pciinit.c index b0a712d..eeb4052 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -123,6 +123,8 @@ static void pci_set_io_region_addr(u16 bdf, int region_num, u32 addr) */ static int pci_bios_allocate_region(u16 bdf, int region_num) {
- return 0;
- struct pci_region *r; u32 ofs = pci_bar(bdf, region_num);
@@ -184,6 +186,8 @@ static int pci_bios_allocate_region(u16 bdf, int region_num)
static void pci_bios_allocate_regions(struct pci_device *pci, void *arg) {
- return;
- int i; for (i = 0; i < PCI_NUM_REGIONS; i++) { int is_64bit = pci_bios_allocate_region(pci->bdf, i);
@@ -239,6 +243,7 @@ static const struct pci_device_id pci_isa_bridge_tbl[] = { #define PCI_PREF_MEMORY_ALIGN (1UL << 20) #define PCI_PREF_MEMORY_SHIFT 16
+#if 0 static void pci_bios_init_device_bridge(struct pci_device *pci, void *arg) { u16 bdf = pci->bdf; @@ -323,6 +328,7 @@ static void pci_bios_init_device_bridge(struct pci_device *pci, void *arg)
pci_config_maskw(bdf, PCI_BRIDGE_CONTROL, 0, PCI_BRIDGE_CTL_SERR);
} +#endif
static void storage_ide_init(struct pci_device *pci, void *arg) { @@ -374,9 +380,11 @@ static const struct pci_device_id pci_class_tbl[] = { PCI_DEVICE_CLASS(PCI_VENDOR_ID_APPLE, 0x0017, 0xff00, apple_macio_init), PCI_DEVICE_CLASS(PCI_VENDOR_ID_APPLE, 0x0022, 0xff00, apple_macio_init),
+#if 0 /* PCI bridge */ PCI_DEVICE_CLASS(PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_BRIDGE_PCI, pci_bios_init_device_bridge), +#endif
/* default */ PCI_DEVICE(PCI_ANY_ID, PCI_ANY_ID, pci_bios_allocate_regions),
@@ -605,6 +613,7 @@ static void pci_bios_check_device(struct pci_bus *bus, struct pci_device *dev)
static void pci_bios_map_device(struct pci_bus *bus, struct pci_device *dev) {
u16 bdf = dev->bdf; int type, i;
if (dev->class == PCI_CLASS_BRIDGE_PCI) {
@@ -612,13 +621,32 @@ static void pci_bios_map_device(struct pci_bus *bus, struct pci_device *dev) return; } struct pci_bus *s = busses + dev->secondary_bus;
u32 base, limit; for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { s->r[type].base = pci_bios_bus_get_addr(bus, type, s->r[type].size); } dprintf(1, "PCI: init bases bus %d (secondary)\n", dev->secondary_bus); pci_bios_init_bus_bases(s);
/* TODO: commit assignments */
base = s->r[PCI_REGION_TYPE_IO].base;
limit = base + s->r[PCI_REGION_TYPE_IO].size - 1;
pci_config_writeb(bdf, PCI_IO_BASE, base >> PCI_IO_SHIFT);
pci_config_writew(bdf, PCI_IO_BASE_UPPER16, 0);
pci_config_writeb(bdf, PCI_IO_LIMIT, limit >> PCI_IO_SHIFT);
pci_config_writew(bdf, PCI_IO_LIMIT_UPPER16, 0);
base = s->r[PCI_REGION_TYPE_MEM].base;
limit = base + s->r[PCI_REGION_TYPE_MEM].size - 1;
pci_config_writew(bdf, PCI_MEMORY_BASE, base >> PCI_MEMORY_SHIFT);
pci_config_writew(bdf, PCI_MEMORY_LIMIT, limit >> PCI_MEMORY_SHIFT);
base = s->r[PCI_REGION_TYPE_PREFMEM].base;
limit = base + s->r[PCI_REGION_TYPE_PREFMEM].size - 1;
pci_config_writew(bdf, PCI_PREF_MEMORY_BASE, base >> PCI_PREF_MEMORY_SHIFT);
pci_config_writew(bdf, PCI_PREF_MEMORY_LIMIT, limit >> PCI_PREF_MEMORY_SHIFT);
pci_config_writel(bdf, PCI_PREF_BASE_UPPER32, 0);
pci_config_writel(bdf, PCI_PREF_LIMIT_UPPER32, 0); pci_bios_map_device_in_bus(dev->secondary_bus); return;
@@ -635,7 +663,7 @@ static void pci_bios_map_device(struct pci_bus *bus, struct pci_device *dev) dprintf(1, " bar %d, addr %x, size %x [%s]\n", i, addr, dev->bars[i].size, dev->bars[i].addr & PCI_BASE_ADDRESS_SPACE_IO ? "io" : "mem");
/* TODO: commit assignments */
pci_set_io_region_addr(bdf, i, addr); if (dev->bars[i].is64) { i++;
@@ -765,11 +793,14 @@ pci_setup(void) pci_bios_init_bus_bases(&busses[0]); pci_bios_map_device_in_bus(0 /* host bus */);
+#if 0 dprintf(1, "=== PCI old allocation pass ===\n"); struct pci_device *pci; foreachpci(pci) { pci_init_device(pci_isa_bridge_tbl, pci, NULL); } +#endif
pci_bios_init_device_in_bus(0 /* host bus */);
free(busses);
-- 1.7.1
On 07/07/11 18:16, Michael S. Tsirkin wrote:
On Tue, Jul 05, 2011 at 05:27:01PM +0200, Gerd Hoffmann wrote:
This patch actually enables the two-pass pci initialization and deactivates the old pci initialization bits.
Signed-off-by: Gerd Hoffmannkraxel@redhat.com
Do people really want to keep old code ifdef'd in this way?
Patch #3 kills it ...
The reason I did it this way is to ease trouble-shooting in case problems show up. One patch adding the new code, but make it print stuff only. One patch which activates the new code, but leaves the old there, one patch which removes the obsolete bits.
cheers, Gerd
Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- Makefile | 2 +- src/pci_region.c | 77 --------------------- src/pciinit.c | 197 ------------------------------------------------------ src/util.h | 29 -------- 4 files changed, 1 insertions(+), 304 deletions(-) delete mode 100644 src/pci_region.c
diff --git a/Makefile b/Makefile index a96c89d..7234b26 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ SRC16=$(SRCBOTH) system.c disk.c font.c SRC32FLAT=$(SRCBOTH) post.c shadow.c memmap.c coreboot.c boot.c \ acpi.c smm.c mptable.c smbios.c pciinit.c optionroms.c mtrr.c \ lzmadecode.c bootsplash.c jpeg.c usb-hub.c paravirt.c \ - pci_region.c biostables.c xen.c + biostables.c xen.c SRC32SEG=util.c output.c pci.c pcibios.c apm.c stacks.c
cc-option = $(shell if test -z "`$(1) $(2) -S -o /dev/null -xc \ diff --git a/src/pci_region.c b/src/pci_region.c deleted file mode 100644 index 1d9de71..0000000 --- a/src/pci_region.c +++ /dev/null @@ -1,77 +0,0 @@ -// helper functions to manage pci io/memory/prefetch memory region -// -// Copyright (C) 2009 Isaku Yamahata <yamahata at valinux co jp> -// -// This file may be distributed under the terms of the GNU LGPLv3 license. -// -// - -#include "util.h" - -#define PCI_REGION_DISABLED (-1) - -void pci_region_init(struct pci_region *r, u32 first, u32 last) -{ - r->first = first; - r->last = last; - - r->cur_first = r->first; -} - -// PCI_REGION_DISABLED represents that the region is in special state. -// its value is chosen such that cur_first can't be PCI_REGION_DISABLED -// normally. -// NOTE: the area right below 4G is used for LAPIC, so such area can't -// be used for PCI memory. -u32 pci_region_disable(struct pci_region *r) -{ - return r->cur_first = PCI_REGION_DISABLED; -} - -static int pci_region_disabled(const struct pci_region *r) -{ - return r->cur_first == PCI_REGION_DISABLED; -} - -static u32 pci_region_alloc_align(struct pci_region *r, u32 size, u32 align) -{ - if (pci_region_disabled(r)) { - return 0; - } - - u32 s = ALIGN(r->cur_first, align); - if (s > r->last || s < r->cur_first) { - return 0; - } - u32 e = s + size; - if (e < s || e - 1 > r->last) { - return 0; - } - r->cur_first = e; - return s; -} - -u32 pci_region_alloc(struct pci_region *r, u32 size) -{ - return pci_region_alloc_align(r, size, size); -} - -u32 pci_region_align(struct pci_region *r, u32 align) -{ - return pci_region_alloc_align(r, 0, align); -} - -void pci_region_revert(struct pci_region *r, u32 addr) -{ - r->cur_first = addr; -} - -u32 pci_region_addr(const struct pci_region *r) -{ - return r->cur_first; -} - -u32 pci_region_size(const struct pci_region *r) -{ - return r->last - r->first + 1; -} diff --git a/src/pciinit.c b/src/pciinit.c index eeb4052..aafdf7c 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -18,10 +18,6 @@ #define PCI_BRIDGE_IO_MIN 0x100 #define PCI_BRIDGE_MEM_MIN 0x100000
-static struct pci_region pci_bios_io_region; -static struct pci_region pci_bios_mem_region; -static struct pci_region pci_bios_prefmem_region; - enum pci_region_type { PCI_REGION_TYPE_IO, PCI_REGION_TYPE_MEM, @@ -113,88 +109,6 @@ static void pci_set_io_region_addr(u16 bdf, int region_num, u32 addr) ofs = pci_bar(bdf, region_num);
pci_config_writel(bdf, ofs, addr); - dprintf(1, "region %d: 0x%08x\n", region_num, addr); -} - -/* - * return value - * 0: 32bit BAR - * non 0: 64bit BAR - */ -static int pci_bios_allocate_region(u16 bdf, int region_num) -{ - return 0; - - struct pci_region *r; - u32 ofs = pci_bar(bdf, region_num); - - u32 old = pci_config_readl(bdf, ofs); - u32 mask; - if (region_num == PCI_ROM_SLOT) { - mask = PCI_ROM_ADDRESS_MASK; - pci_config_writel(bdf, ofs, mask); - } else { - if (old & PCI_BASE_ADDRESS_SPACE_IO) - mask = PCI_BASE_ADDRESS_IO_MASK; - else - mask = PCI_BASE_ADDRESS_MEM_MASK; - pci_config_writel(bdf, ofs, ~0); - } - u32 val = pci_config_readl(bdf, ofs); - pci_config_writel(bdf, ofs, old); - - u32 size = (~(val & mask)) + 1; - if (val != 0) { - const char *type; - const char *msg; - if (val & PCI_BASE_ADDRESS_SPACE_IO) { - r = &pci_bios_io_region; - type = "io"; - msg = ""; - } else if ((val & PCI_BASE_ADDRESS_MEM_PREFETCH) && - /* keep behaviour on bus = 0 */ - pci_bdf_to_bus(bdf) != 0 && - /* If pci_bios_prefmem_addr == 0, keep old behaviour */ - pci_region_addr(&pci_bios_prefmem_region) != 0) { - r = &pci_bios_prefmem_region; - type = "prefmem"; - msg = "decrease BUILD_PCIMEM_SIZE and recompile. size %x"; - } else { - r = &pci_bios_mem_region; - type = "mem"; - msg = "increase BUILD_PCIMEM_SIZE and recompile."; - } - u32 addr = pci_region_alloc(r, size); - if (addr > 0) { - pci_set_io_region_addr(bdf, region_num, addr); - } else { - size = 0; - dprintf(1, - "%s region of (bdf 0x%x bar %d) can't be mapped. " - "%s size %x\n", - type, bdf, region_num, msg, pci_region_size(r)); - } - } - - int is_64bit = !(val & PCI_BASE_ADDRESS_SPACE_IO) && - (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64; - if (is_64bit && size > 0) { - pci_config_writel(bdf, ofs + 4, 0); - } - return is_64bit; -} - -static void pci_bios_allocate_regions(struct pci_device *pci, void *arg) -{ - return; - - int i; - for (i = 0; i < PCI_NUM_REGIONS; i++) { - int is_64bit = pci_bios_allocate_region(pci->bdf, i); - if (is_64bit){ - i++; - } - } }
/* return the global irq number corresponding to a given device irq @@ -243,93 +157,6 @@ static const struct pci_device_id pci_isa_bridge_tbl[] = { #define PCI_PREF_MEMORY_ALIGN (1UL << 20) #define PCI_PREF_MEMORY_SHIFT 16
-#if 0 -static void pci_bios_init_device_bridge(struct pci_device *pci, void *arg) -{ - u16 bdf = pci->bdf; - pci_bios_allocate_region(bdf, 0); - pci_bios_allocate_region(bdf, 1); - pci_bios_allocate_region(bdf, PCI_ROM_SLOT); - - u32 io_old = pci_region_addr(&pci_bios_io_region); - u32 mem_old = pci_region_addr(&pci_bios_mem_region); - u32 prefmem_old = pci_region_addr(&pci_bios_prefmem_region); - - /* IO BASE is assumed to be 16 bit */ - if (pci_region_align(&pci_bios_io_region, PCI_IO_ALIGN) == 0) { - pci_region_disable(&pci_bios_io_region); - } - if (pci_region_align(&pci_bios_mem_region, PCI_MEMORY_ALIGN) == 0) { - pci_region_disable(&pci_bios_mem_region); - } - if (pci_region_align(&pci_bios_prefmem_region, - PCI_PREF_MEMORY_ALIGN) == 0) { - pci_region_disable(&pci_bios_prefmem_region); - } - - u32 io_base = pci_region_addr(&pci_bios_io_region); - u32 mem_base = pci_region_addr(&pci_bios_mem_region); - u32 prefmem_base = pci_region_addr(&pci_bios_prefmem_region); - - u8 secbus = pci_config_readb(bdf, PCI_SECONDARY_BUS); - if (secbus > 0) { - pci_bios_init_device_in_bus(secbus); - } - - u32 io_end = pci_region_align(&pci_bios_io_region, PCI_IO_ALIGN); - if (io_end == 0) { - pci_region_revert(&pci_bios_io_region, io_old); - io_base = 0xffff; - io_end = 1; - } - pci_config_writeb(bdf, PCI_IO_BASE, io_base >> PCI_IO_SHIFT); - pci_config_writew(bdf, PCI_IO_BASE_UPPER16, 0); - pci_config_writeb(bdf, PCI_IO_LIMIT, (io_end - 1) >> PCI_IO_SHIFT); - pci_config_writew(bdf, PCI_IO_LIMIT_UPPER16, 0); - - u32 mem_end = pci_region_align(&pci_bios_mem_region, PCI_MEMORY_ALIGN); - if (mem_end == 0) { - pci_region_revert(&pci_bios_mem_region, mem_old); - mem_base = 0xffffffff; - mem_end = 1; - } - pci_config_writew(bdf, PCI_MEMORY_BASE, mem_base >> PCI_MEMORY_SHIFT); - pci_config_writew(bdf, PCI_MEMORY_LIMIT, (mem_end -1) >> PCI_MEMORY_SHIFT); - - u32 prefmem_end = pci_region_align(&pci_bios_prefmem_region, - PCI_PREF_MEMORY_ALIGN); - if (prefmem_end == 0) { - pci_region_revert(&pci_bios_prefmem_region, prefmem_old); - prefmem_base = 0xffffffff; - prefmem_end = 1; - } - pci_config_writew(bdf, PCI_PREF_MEMORY_BASE, - prefmem_base >> PCI_PREF_MEMORY_SHIFT); - pci_config_writew(bdf, PCI_PREF_MEMORY_LIMIT, - (prefmem_end - 1) >> PCI_PREF_MEMORY_SHIFT); - pci_config_writel(bdf, PCI_PREF_BASE_UPPER32, 0); - pci_config_writel(bdf, PCI_PREF_LIMIT_UPPER32, 0); - - dprintf(1, "PCI: br io = [0x%x, 0x%x)\n", io_base, io_end); - dprintf(1, "PCI: br mem = [0x%x, 0x%x)\n", mem_base, mem_end); - dprintf(1, "PCI: br pref = [0x%x, 0x%x)\n", prefmem_base, prefmem_end); - - u16 cmd = pci_config_readw(bdf, PCI_COMMAND); - cmd &= ~PCI_COMMAND_IO; - if (io_end > io_base) { - cmd |= PCI_COMMAND_IO; - } - cmd &= ~PCI_COMMAND_MEMORY; - if (mem_end > mem_base || prefmem_end > prefmem_base) { - cmd |= PCI_COMMAND_MEMORY; - } - cmd |= PCI_COMMAND_MASTER; - pci_config_writew(bdf, PCI_COMMAND, cmd); - - pci_config_maskw(bdf, PCI_BRIDGE_CONTROL, 0, PCI_BRIDGE_CTL_SERR); -} -#endif - static void storage_ide_init(struct pci_device *pci, void *arg) { u16 bdf = pci->bdf; @@ -346,7 +173,6 @@ static void piix_ide_init(struct pci_device *pci, void *arg) u16 bdf = pci->bdf; pci_config_writew(bdf, 0x40, 0x8000); // enable IDE0 pci_config_writew(bdf, 0x42, 0x8000); // enable IDE1 - pci_bios_allocate_regions(pci, NULL); }
static void pic_ibm_init(struct pci_device *pci, void *arg) @@ -380,15 +206,6 @@ static const struct pci_device_id pci_class_tbl[] = { PCI_DEVICE_CLASS(PCI_VENDOR_ID_APPLE, 0x0017, 0xff00, apple_macio_init), PCI_DEVICE_CLASS(PCI_VENDOR_ID_APPLE, 0x0022, 0xff00, apple_macio_init),
-#if 0 - /* PCI bridge */ - PCI_DEVICE_CLASS(PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_BRIDGE_PCI, - pci_bios_init_device_bridge), -#endif - - /* default */ - PCI_DEVICE(PCI_ANY_ID, PCI_ANY_ID, pci_bios_allocate_regions), - PCI_DEVICE_END, };
@@ -767,12 +584,6 @@ pci_setup(void) u32 start = BUILD_PCIMEM_START; u32 end = BUILD_IOAPIC_ADDR;
- pci_region_init(&pci_bios_io_region, 0xc000, 64 * 1024 - 1); - pci_region_init(&pci_bios_mem_region, - BUILD_PCIMEM_START, BUILD_PCIMEM_END - 1); - pci_region_init(&pci_bios_prefmem_region, - BUILD_PCIPREFMEM_START, BUILD_PCIPREFMEM_END - 1); - dprintf(1, "=== PCI bus & bridge init ===\n"); pci_bios_init_bus();
@@ -793,14 +604,6 @@ pci_setup(void) pci_bios_init_bus_bases(&busses[0]); pci_bios_map_device_in_bus(0 /* host bus */);
-#if 0 - dprintf(1, "=== PCI old allocation pass ===\n"); - struct pci_device *pci; - foreachpci(pci) { - pci_init_device(pci_isa_bridge_tbl, pci, NULL); - } -#endif - pci_bios_init_device_in_bus(0 /* host bus */);
free(busses); diff --git a/src/util.h b/src/util.h index 7034d43..d856d50 100644 --- a/src/util.h +++ b/src/util.h @@ -344,35 +344,6 @@ void make_bios_writable(void); void make_bios_readonly(void); void qemu_prep_reset(void);
-// pci_region.c -// region allocator. pci region allocates the requested region -// sequentially with overflow check. -struct pci_region { - // The region is [first, last]. - u32 first; - u32 last; - - // The next allocation starts from here. - // i.e. [start, cur_first) is allocated. - // Right after initialization cur_first == first. - u32 cur_first; -}; -// initialize the pci_region of [first, last] -// last must not be 0xffffffff -void pci_region_init(struct pci_region *r, u32 first, u32 last); -// allocate the region of size -u32 pci_region_alloc(struct pci_region *r, u32 size); -// make the next allocation aligned to align -u32 pci_region_align(struct pci_region *r, u32 align); -// revert the allocation to addr. -void pci_region_revert(struct pci_region *r, u32 addr); -// make the allocation fail. -u32 pci_region_disable(struct pci_region *r); -// returns the current allocation point. -u32 pci_region_addr(const struct pci_region *r); -// returns the region size. -u32 pci_region_size(const struct pci_region *r); - // pciinit.c extern const u8 pci_irqs[4]; void pci_setup(void);
Try to handle address space shortage by skipping any device which isn't essential for boot.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- src/pci.c | 3 +++ src/pciinit.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++-- src/util.h | 2 ++ 3 files changed, 54 insertions(+), 2 deletions(-)
diff --git a/src/pci.c b/src/pci.c index 78bbac2..af735b2 100644 --- a/src/pci.c +++ b/src/pci.c @@ -193,6 +193,9 @@ pci_find_class(u16 classid) int pci_init_device(const struct pci_device_id *ids , struct pci_device *pci, void *arg) { + if (!pci_enable_device(pci)) + return -1; + while (ids->vendid || ids->class_mask) { if ((ids->vendid == PCI_ANY_ID || ids->vendid == pci->vendor) && (ids->devid == PCI_ANY_ID || ids->devid == pci->device) && diff --git a/src/pciinit.c b/src/pciinit.c index aafdf7c..8b95132 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -44,6 +44,7 @@ static struct pci_bus { } r[PCI_REGION_TYPE_COUNT]; } *busses; static int busses_count; +static int boot_devices_only = 0;
static void pci_bios_init_device_in_bus(int bus); static void pci_bios_check_device_in_bus(int bus); @@ -157,6 +158,40 @@ static const struct pci_device_id pci_isa_bridge_tbl[] = { #define PCI_PREF_MEMORY_ALIGN (1UL << 20) #define PCI_PREF_MEMORY_SHIFT 16
+static int is_boot_device(struct pci_device *pci) +{ + /* check base class */ + switch (pci->class >> 8) { + case PCI_BASE_CLASS_STORAGE: + case PCI_BASE_CLASS_NETWORK: + case PCI_BASE_CLASS_BRIDGE: + case PCI_BASE_CLASS_SYSTEM: + return 1; + } + + /* check class */ + switch (pci->class) { + case PCI_CLASS_DISPLAY_VGA: + case PCI_CLASS_SERIAL_USB: + return 1; + } + + /* check if ROM present */ + if (pci->bars[PCI_ROM_SLOT].addr != 0) { + return 1; + } + + return 0; +} + +int pci_enable_device(struct pci_device *pci) +{ + if (boot_devices_only) { + return is_boot_device(pci); + } + return 1; +} + static void storage_ide_init(struct pci_device *pci, void *arg) { u16 bdf = pci->bdf; @@ -385,6 +420,9 @@ static void pci_bios_check_device(struct pci_bus *bus, struct pci_device *dev) u32 limit; int i,type;
+ if (!pci_enable_device(dev)) + return; + if (dev->class == PCI_CLASS_BRIDGE_PCI) { if (dev->secondary_bus >= busses_count) { /* should never trigger */ @@ -433,6 +471,9 @@ static void pci_bios_map_device(struct pci_bus *bus, struct pci_device *dev) u16 bdf = dev->bdf; int type, i;
+ if (!pci_enable_device(dev)) + return; + if (dev->class == PCI_CLASS_BRIDGE_PCI) { if (dev->secondary_bus >= busses_count) { return; @@ -595,8 +636,14 @@ pci_setup(void) busses = malloc_tmp(sizeof(*busses) * busses_count); pci_bios_check_device_in_bus(0 /* host bus */); if (pci_bios_init_root_regions(start, end) != 0) { - dprintf(1, "PCI: out of address space\n"); - /* Hmm, what do now? */ + dprintf(1, "=== PCI new allocation pass #1a (boot only) ===\n"); + boot_devices_only = 1; + memset(busses, 0, busses_count * sizeof(struct pci_bus)); + pci_bios_check_device_in_bus(0 /* host bus */); + if (pci_bios_init_root_regions(start, end) != 0) { + dprintf(1, "PCI: out of address space\n"); + /* Hmm, what do now? */ + } }
dprintf(1, "=== PCI new allocation pass #2 ===\n"); diff --git a/src/util.h b/src/util.h index d856d50..8020181 100644 --- a/src/util.h +++ b/src/util.h @@ -347,6 +347,8 @@ void qemu_prep_reset(void); // pciinit.c extern const u8 pci_irqs[4]; void pci_setup(void); +struct pci_device; +int pci_enable_device(struct pci_device *pci);
// smm.c void smm_init(void);
On Tue, Jul 05, 2011 at 05:27:03PM +0200, Gerd Hoffmann wrote:
Try to handle address space shortage by skipping any device which isn't essential for boot.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com
At least in a virt setup, it's much easier to debug things if boot just fails. Partial boot could be an option I guess. We usually have a list of bootable devices we got from qemu - want to use that?
src/pci.c | 3 +++ src/pciinit.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++-- src/util.h | 2 ++ 3 files changed, 54 insertions(+), 2 deletions(-)
diff --git a/src/pci.c b/src/pci.c index 78bbac2..af735b2 100644 --- a/src/pci.c +++ b/src/pci.c @@ -193,6 +193,9 @@ pci_find_class(u16 classid) int pci_init_device(const struct pci_device_id *ids , struct pci_device *pci, void *arg) {
- if (!pci_enable_device(pci))
return -1;
- while (ids->vendid || ids->class_mask) { if ((ids->vendid == PCI_ANY_ID || ids->vendid == pci->vendor) && (ids->devid == PCI_ANY_ID || ids->devid == pci->device) &&
diff --git a/src/pciinit.c b/src/pciinit.c index aafdf7c..8b95132 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -44,6 +44,7 @@ static struct pci_bus { } r[PCI_REGION_TYPE_COUNT]; } *busses; static int busses_count; +static int boot_devices_only = 0;
static void pci_bios_init_device_in_bus(int bus); static void pci_bios_check_device_in_bus(int bus); @@ -157,6 +158,40 @@ static const struct pci_device_id pci_isa_bridge_tbl[] = { #define PCI_PREF_MEMORY_ALIGN (1UL << 20) #define PCI_PREF_MEMORY_SHIFT 16
+static int is_boot_device(struct pci_device *pci) +{
- /* check base class */
- switch (pci->class >> 8) {
- case PCI_BASE_CLASS_STORAGE:
- case PCI_BASE_CLASS_NETWORK:
- case PCI_BASE_CLASS_BRIDGE:
- case PCI_BASE_CLASS_SYSTEM:
return 1;
- }
- /* check class */
- switch (pci->class) {
- case PCI_CLASS_DISPLAY_VGA:
- case PCI_CLASS_SERIAL_USB:
return 1;
- }
- /* check if ROM present */
- if (pci->bars[PCI_ROM_SLOT].addr != 0) {
return 1;
- }
- return 0;
One worry here is that this list might not be exhaustive.
+}
+int pci_enable_device(struct pci_device *pci) +{
- if (boot_devices_only) {
return is_boot_device(pci);
- }
- return 1;
+}
static void storage_ide_init(struct pci_device *pci, void *arg) { u16 bdf = pci->bdf; @@ -385,6 +420,9 @@ static void pci_bios_check_device(struct pci_bus *bus, struct pci_device *dev) u32 limit; int i,type;
- if (!pci_enable_device(dev))
return;
- if (dev->class == PCI_CLASS_BRIDGE_PCI) { if (dev->secondary_bus >= busses_count) { /* should never trigger */
@@ -433,6 +471,9 @@ static void pci_bios_map_device(struct pci_bus *bus, struct pci_device *dev) u16 bdf = dev->bdf; int type, i;
- if (!pci_enable_device(dev))
return;
- if (dev->class == PCI_CLASS_BRIDGE_PCI) { if (dev->secondary_bus >= busses_count) { return;
@@ -595,8 +636,14 @@ pci_setup(void) busses = malloc_tmp(sizeof(*busses) * busses_count); pci_bios_check_device_in_bus(0 /* host bus */); if (pci_bios_init_root_regions(start, end) != 0) {
dprintf(1, "PCI: out of address space\n");
/* Hmm, what do now? */
dprintf(1, "=== PCI new allocation pass #1a (boot only) ===\n");
boot_devices_only = 1;
memset(busses, 0, busses_count * sizeof(struct pci_bus));
pci_bios_check_device_in_bus(0 /* host bus */);
if (pci_bios_init_root_regions(start, end) != 0) {
dprintf(1, "PCI: out of address space\n");
/* Hmm, what do now? */
}
}
dprintf(1, "=== PCI new allocation pass #2 ===\n");
diff --git a/src/util.h b/src/util.h index d856d50..8020181 100644 --- a/src/util.h +++ b/src/util.h @@ -347,6 +347,8 @@ void qemu_prep_reset(void); // pciinit.c extern const u8 pci_irqs[4]; void pci_setup(void); +struct pci_device; +int pci_enable_device(struct pci_device *pci);
// smm.c void smm_init(void); -- 1.7.1
On 07/05/11 18:23, Michael S. Tsirkin wrote:
On Tue, Jul 05, 2011 at 05:27:03PM +0200, Gerd Hoffmann wrote:
Try to handle address space shortage by skipping any device which isn't essential for boot.
Signed-off-by: Gerd Hoffmannkraxel@redhat.com
At least in a virt setup, it's much easier to debug things if boot just fails. Partial boot could be an option I guess.
Yea, I think that is pretty much the fundamental question. Does it make sense to try boot up even if we can't fit some devices into the pci memory hole.
At least linux guests will try to map devices below the pci memory hole in case seabios didn't assign an address. Of course this works only if the guest hasn't too much memory so there is some free space between end of ram and the start of the pci memory hole.
We usually have a list of bootable devices we got from qemu - want to use that?
Why? seabios knows itself which devices it can use to boot. Also the list from qemu is incomplete, the boot menu can have more entries than what we get passed in from qemu as boot order list.
cheers, Gerd
On Wed, Jul 06, 2011 at 10:57:41AM +0200, Gerd Hoffmann wrote:
On 07/05/11 18:23, Michael S. Tsirkin wrote:
On Tue, Jul 05, 2011 at 05:27:03PM +0200, Gerd Hoffmann wrote:
Try to handle address space shortage by skipping any device which isn't essential for boot.
Signed-off-by: Gerd Hoffmannkraxel@redhat.com
At least in a virt setup, it's much easier to debug things if boot just fails. Partial boot could be an option I guess.
Yea, I think that is pretty much the fundamental question. Does it make sense to try boot up even if we can't fit some devices into the pci memory hole.
At least linux guests will try to map devices below the pci memory hole in case seabios didn't assign an address. Of course this works only if the guest hasn't too much memory so there is some free space between end of ram and the start of the pci memory hole.
We usually have a list of bootable devices we got from qemu - want to use that?
Why? seabios knows itself which devices it can use to boot. Also the list from qemu is incomplete, the boot menu can have more entries than what we get passed in from qemu as boot order list.
Correct. Boot order contains only devices with assigned priorities. It still makes sense to try to configure them first since if user assigned boot priority he expects to boot from one of them. I wouldn't complicate the patch to much for that pathological case though.
-- Gleb.
On Wed, Jul 06, 2011 at 10:57:41AM +0200, Gerd Hoffmann wrote:
On 07/05/11 18:23, Michael S. Tsirkin wrote:
On Tue, Jul 05, 2011 at 05:27:03PM +0200, Gerd Hoffmann wrote:
Try to handle address space shortage by skipping any device which isn't essential for boot.
Signed-off-by: Gerd Hoffmannkraxel@redhat.com
At least in a virt setup, it's much easier to debug things if boot just fails. Partial boot could be an option I guess.
Yea, I think that is pretty much the fundamental question. Does it make sense to try boot up even if we can't fit some devices into the pci memory hole.
Well, the real fix is for the management to remove the offending devices, so I think we should make it as easy as possible to detect the problem and implement the fix.
We could also implement an interface allowing the user to select which devices to disable.
At least linux guests will try to map devices below the pci memory hole in case seabios didn't assign an address.
Of course this works only if the guest hasn't too much memory so there is some free space between end of ram and the start of the pci memory hole.
IO port memory is a problem too, BTW, and not that easily worked around. It's becoming rare in the non-virtualized systems but is unfortunately the mechanism of choice for virtualization.
We usually have a list of bootable devices we got from qemu - want to use that?
Why? seabios knows itself which devices it can use to boot. Also the list from qemu is incomplete, the boot menu can have more entries than what we get passed in from qemu as boot order list.
cheers, Gerd
On Wed, Jul 06, 2011 at 12:18:13PM +0300, Michael S. Tsirkin wrote:
On Wed, Jul 06, 2011 at 10:57:41AM +0200, Gerd Hoffmann wrote:
On 07/05/11 18:23, Michael S. Tsirkin wrote:
On Tue, Jul 05, 2011 at 05:27:03PM +0200, Gerd Hoffmann wrote:
Try to handle address space shortage by skipping any device which isn't essential for boot.
Signed-off-by: Gerd Hoffmannkraxel@redhat.com
At least in a virt setup, it's much easier to debug things if boot just fails. Partial boot could be an option I guess.
Yea, I think that is pretty much the fundamental question. Does it make sense to try boot up even if we can't fit some devices into the pci memory hole.
Well, the real fix is for the management to remove the offending devices, so I think we should make it as easy as possible to detect the problem and implement the fix.
We could also implement an interface allowing the user to select which devices to disable.
Interface that requires device identifications between qemu and the BIOS involve device paths. Not sure you want to go there :) (Although for PCI devices this is solved problem).
At least linux guests will try to map devices below the pci memory hole in case seabios didn't assign an address.
Of course this works only if the guest hasn't too much memory so there is some free space between end of ram and the start of the pci memory hole.
IO port memory is a problem too, BTW, and not that easily worked around. It's becoming rare in the non-virtualized systems but is unfortunately the mechanism of choice for virtualization.
We usually have a list of bootable devices we got from qemu - want to use that?
Why? seabios knows itself which devices it can use to boot. Also the list from qemu is incomplete, the boot menu can have more entries than what we get passed in from qemu as boot order list.
cheers, Gerd
SeaBIOS mailing list SeaBIOS@seabios.org http://www.seabios.org/mailman/listinfo/seabios
-- Gleb.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- src/config.h | 14 -------------- src/pciinit.c | 2 +- 2 files changed, 1 insertions(+), 15 deletions(-)
diff --git a/src/config.h b/src/config.h index e6e07c9..687cac6 100644 --- a/src/config.h +++ b/src/config.h @@ -40,23 +40,9 @@ #define BUILD_BIOS_TMP_ADDR 0x30000 #define BUILD_MAX_HIGHMEM 0xe0000000
-// Support old pci mem assignment behaviour -//#define CONFIG_OLD_PCIMEM_ASSIGNMENT 1 -#if CONFIG_OLD_PCIMEM_ASSIGNMENT #define BUILD_PCIMEM_START 0xf0000000 #define BUILD_PCIMEM_SIZE (BUILD_PCIMEM_END - BUILD_PCIMEM_START) #define BUILD_PCIMEM_END 0xfec00000 /* IOAPIC is mapped at */ -#define BUILD_PCIPREFMEM_START 0 -#define BUILD_PCIPREFMEM_SIZE 0 -#define BUILD_PCIPREFMEM_END 0 -#else -#define BUILD_PCIMEM_START 0xf0000000 -#define BUILD_PCIMEM_SIZE 0x08000000 /* half- of pci window */ -#define BUILD_PCIMEM_END (BUILD_PCIMEM_START + BUILD_PCIMEM_SIZE) -#define BUILD_PCIPREFMEM_START BUILD_PCIMEM_END -#define BUILD_PCIPREFMEM_SIZE (BUILD_PCIPREFMEM_END - BUILD_PCIPREFMEM_START) -#define BUILD_PCIPREFMEM_END 0xfec00000 /* IOAPIC is mapped at */ -#endif
#define BUILD_APIC_ADDR 0xfee00000 #define BUILD_IOAPIC_ADDR 0xfec00000 diff --git a/src/pciinit.c b/src/pciinit.c index 8b95132..6aa09b3 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -623,7 +623,7 @@ pci_setup(void) dprintf(3, "pci setup\n");
u32 start = BUILD_PCIMEM_START; - u32 end = BUILD_IOAPIC_ADDR; + u32 end = BUILD_PCIMEM_END;
dprintf(1, "=== PCI bus & bridge init ===\n"); pci_bios_init_bus();
... and make it match with the declarations in acpi-dsdt.dsl.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- src/config.h | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/src/config.h b/src/config.h index 687cac6..f2fce89 100644 --- a/src/config.h +++ b/src/config.h @@ -40,7 +40,7 @@ #define BUILD_BIOS_TMP_ADDR 0x30000 #define BUILD_MAX_HIGHMEM 0xe0000000
-#define BUILD_PCIMEM_START 0xf0000000 +#define BUILD_PCIMEM_START 0xe0000000 #define BUILD_PCIMEM_SIZE (BUILD_PCIMEM_END - BUILD_PCIMEM_START) #define BUILD_PCIMEM_END 0xfec00000 /* IOAPIC is mapped at */