Hi,
This patch set switches seabios to a two-pass pci initialization. The first pass figures the memory requirements which are then used by the second pass to pack the regions.
I've splitted it into two patches to make it easy to check out. The first patch does all the calculations and prints what it would do to the debug log. The second patch switches the new allocation code live.
The patches have been tested with the upcoming q35 emulation for qemu. They are based on the q35 seabios too, but rebasing to master shouldn't be hard.
Below you can see the allocations created by the patch.
Comments?
cheers, Gerd
========== /proc/iomem ============== 00000000-00000fff : reserved 00001000-0009e7ff : System RAM 0009e800-0009ffff : reserved 000c0000-000c8bff : Video ROM 000c9000-000c97ff : Adapter ROM 000f0000-000fffff : reserved 000f0000-000fffff : System ROM 00100000-3fffcfff : System RAM 01000000-014e5a17 : Kernel code 014e5a18-01ba42ef : Kernel data 01ce4000-01f474e3 : Kernel bss 3fffd000-3fffffff : reserved e0000000-efffffff : PCI MMCONFIG 0 [00-ff] e0000000-efffffff : reserved fa000000-fbffffff : 0000:00:02.0 fa000000-fbffffff : cirrusfb fc000000-fc3fffff : PCI Bus 0000:0c fc000000-fc0fffff : PCI Bus 0000:0d fc100000-fc1fffff : PCI Bus 0000:0e fc200000-fc2fffff : PCI Bus 0000:0f fc300000-fc3fffff : PCI Bus 0000:10 fc400000-fc4fffff : PCI Bus 0000:01 fc500000-fc5fffff : PCI Bus 0000:02 fc600000-fc6fffff : PCI Bus 0000:03 fc600000-fc6fffff : PCI Bus 0000:04 fc600000-fc6fffff : PCI Bus 0000:05 fc700000-fc7fffff : PCI Bus 0000:06 fc800000-fc8fffff : PCI Bus 0000:07 fc900000-fc9fffff : PCI Bus 0000:08 fca00000-fcafffff : PCI Bus 0000:09 fcb00000-fcbfffff : PCI Bus 0000:0a fcc00000-fccfffff : PCI Bus 0000:0b fdc00000-fdffffff : PCI Bus 0000:0c fdc00000-fdcfffff : PCI Bus 0000:0d fdd00000-fddfffff : PCI Bus 0000:0e fde00000-fdefffff : PCI Bus 0000:0f fdf00000-fdffffff : PCI Bus 0000:10 fe000000-fe0fffff : PCI Bus 0000:01 fe100000-fe1fffff : PCI Bus 0000:02 fe200000-fe2fffff : PCI Bus 0000:03 fe200000-fe2fffff : PCI Bus 0000:04 fe200000-fe2fffff : PCI Bus 0000:05 fe300000-fe3fffff : PCI Bus 0000:06 fe400000-fe4fffff : PCI Bus 0000:07 fe500000-fe5fffff : PCI Bus 0000:08 fe600000-fe6fffff : PCI Bus 0000:09 fe700000-fe7fffff : PCI Bus 0000:0a fe800000-fe8fffff : PCI Bus 0000:0b fe900000-fe90ffff : 0000:00:02.0 fe910000-fe91ffff : 0000:00:03.0 fe920000-fe920fff : 0000:00:02.0 fe920000-fe920fff : cirrusfb fe921000-fe921fff : 0000:00:03.0 fe921000-fe921fff : virtio-pci fe922000-fe922fff : 0000:00:1f.2 fe922000-fe922fff : ahci fec00000-fec00fff : IOAPIC 0 fed00000-fed003ff : HPET 0 fee00000-fee00fff : Local APIC feffc000-feffffff : reserved fffc0000-ffffffff : reserved
This patch adds a second device scan to the pci initialization, which counts the memory bars of the various sizes and types. Then it calculates the sizes and the packing of the prefetchable and non-prefetchable pci memory windows and prints the results.
TODO: actually use the calculated stuff.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- src/pciinit.c | 352 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 350 insertions(+), 2 deletions(-)
diff --git a/src/pciinit.c b/src/pciinit.c index 97df126..537eed0 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -17,12 +17,77 @@ #define PCI_ROM_SLOT 6 #define PCI_NUM_REGIONS 7
-static void pci_bios_init_device_in_bus(int bus); +#define PCI_IO_INDEX_SHIFT 2 +#define PCI_MEM_INDEX_SHIFT 12 + +#define PCI_BRIDGE_IO_MIN 0x100 +#define PCI_BRIDGE_MEM_MIN 0x100000
static struct pci_region pci_bios_io_region; static struct pci_region pci_bios_mem_region; static struct pci_region pci_bios_prefmem_region;
+static struct pci_bus { + /* pci region stats */ + u32 io_count[16 - PCI_IO_INDEX_SHIFT]; + u32 mem_count[32 - PCI_MEM_INDEX_SHIFT]; + u32 prefmem_count[32 - PCI_MEM_INDEX_SHIFT]; + u32 io_sum, io_max; + u32 mem_sum, mem_max; + u32 prefmem_sum, prefmem_max; + /* seconday bus region sizes */ + u32 io_size, mem_size, prefmem_size; + /* pci region assignments */ + u32 io_bases[16 - PCI_IO_INDEX_SHIFT]; + u32 mem_bases[32 - PCI_MEM_INDEX_SHIFT]; + u32 prefmem_bases[32 - PCI_MEM_INDEX_SHIFT]; + u32 io_base, mem_base, prefmem_base; +} busses[32]; + +static void pci_bios_init_device_in_bus(int bus); +static void pci_bios_check_device_in_bus(int bus); +static void pci_bios_init_bus_bases(struct pci_bus *bus); +static void pci_bios_map_device_in_bus(int bus); + +static int pci_size_to_index(u32 size, int shift) +{ + int index = 0; + + while (size > (1 << index)) { + index++; + } + if (index < shift) + index = shift; + index -= shift; + return index; +} + +static u32 pci_size_roundup(u32 size) +{ + int index = pci_size_to_index(size, 0); + return 1 << index; +} + +static int pci_io_size_to_index(u32 size) +{ + return pci_size_to_index(size, PCI_IO_INDEX_SHIFT); +} + +static u32 pci_io_index_to_size(int index) +{ + return 1 << (index + PCI_IO_INDEX_SHIFT); +} + +static int pci_mem_size_to_index(u32 size) +{ + return pci_size_to_index(size, PCI_MEM_INDEX_SHIFT); +} + +static u32 pci_mem_index_to_size(int index) +{ + return 1 << (index + PCI_MEM_INDEX_SHIFT); +} + /* host irqs corresponding to PCI irqs A-D */ const u8 pci_irqs[4] = { 10, 10, 11, 11 @@ -411,17 +476,296 @@ static const struct pci_device_id pci_mem_addr_tbl[] = { PCI_DEVICE_END, };
+static void pci_bios_bus_get_bar(struct pci_bus *bus, int bdf, int bar, + u32 *val, u32 *size) +{ + u32 ofs = pci_bar(bdf, bar); + u32 old = pci_config_readl(bdf, ofs); + u32 mask; + + if (bar == PCI_ROM_SLOT) { + mask = PCI_ROM_ADDRESS_MASK; + pci_config_writel(bdf, ofs, mask); + } else { + if (old & PCI_BASE_ADDRESS_SPACE_IO) + mask = PCI_BASE_ADDRESS_IO_MASK; + else + mask = PCI_BASE_ADDRESS_MEM_MASK; + pci_config_writel(bdf, ofs, ~0); + } + *val = pci_config_readl(bdf, ofs); + pci_config_writel(bdf, ofs, old); + *size = (~(*val & mask)) + 1; +} + +static void pci_bios_bus_reserve(struct pci_bus *bus, u32 val, u32 size) +{ + u32 index; + + if (val & PCI_BASE_ADDRESS_SPACE_IO) { + index = pci_io_size_to_index(size); + size = pci_io_index_to_size(index); + bus->io_count[index]++; + bus->io_sum += size; + if (bus->io_max < size) + bus->io_max = size; + } else { + index = pci_mem_size_to_index(size); + size = pci_mem_index_to_size(index); + if (val & PCI_BASE_ADDRESS_MEM_PREFETCH) { + bus->prefmem_count[index]++; + bus->prefmem_sum += size; + if (bus->prefmem_max < size) + bus->prefmem_max = size; + } else { + bus->mem_count[index]++; + bus->mem_sum += size; + if (bus->mem_max < size) + bus->mem_max = size; + } + } +} + +static u32 pci_bios_bus_get_addr(struct pci_bus *bus, u32 val, u32 size) +{ + u32 index, addr; + + if (val & PCI_BASE_ADDRESS_SPACE_IO) { + index = pci_io_size_to_index(size); + addr = bus->io_bases[index]; + bus->io_bases[index] += pci_io_index_to_size(index); + } else { + index = pci_mem_size_to_index(size); + if (val & PCI_BASE_ADDRESS_MEM_PREFETCH) { + addr = bus->prefmem_bases[index]; + bus->prefmem_bases[index] += pci_mem_index_to_size(index); + } else { + addr = bus->mem_bases[index]; + bus->mem_bases[index] += pci_mem_index_to_size(index); + } + } + return addr; +} + +static void pci_bios_check_device(struct pci_bus *bus, u16 bdf) +{ + u16 class; + int i; + + class = pci_config_readw(bdf, PCI_CLASS_DEVICE); + if (class == PCI_CLASS_BRIDGE_PCI) { + u8 secbus = pci_config_readb(bdf, PCI_SECONDARY_BUS); + if (secbus >= ARRAY_SIZE(busses)) { + dprintf(1, "PCI: busses array too small, skipping bus %d\n", secbus); + return; + } + struct pci_bus *s = busses + secbus; + pci_bios_check_device_in_bus(secbus); + s->io_size = pci_size_roundup(s->io_sum); + s->mem_size = pci_size_roundup(s->mem_sum); + s->prefmem_size = pci_size_roundup(s->prefmem_sum); + if (s->io_size < PCI_BRIDGE_IO_MIN) { + s->io_size = PCI_BRIDGE_IO_MIN; + } + if (s->mem_size < PCI_BRIDGE_MEM_MIN) { + s->mem_size = PCI_BRIDGE_MEM_MIN; + } + if (s->prefmem_size < PCI_BRIDGE_MEM_MIN) { + s->prefmem_size = PCI_BRIDGE_MEM_MIN; + } + dprintf(1, "PCI: secondary bus %d sizes: io %x, mem %x, prefmem %x\n", + secbus, s->io_size, s->mem_size, s->prefmem_size); + pci_bios_bus_reserve(bus, PCI_BASE_ADDRESS_SPACE_IO, s->io_size); + pci_bios_bus_reserve(bus, 0, s->mem_size); + pci_bios_bus_reserve(bus, PCI_BASE_ADDRESS_MEM_PREFETCH, s->prefmem_size); + return; + } + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + u32 val, size; + pci_bios_bus_get_bar(bus, bdf, i, &val, &size); + if (val == 0) { + continue; + } + pci_bios_bus_reserve(bus, val, size); + + if (!(val & PCI_BASE_ADDRESS_SPACE_IO) && + (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64) { + i++; + } + } +} + +static void pci_bios_map_device(struct pci_bus *bus, u16 bdf) +{ + u16 class; + int i; + + class = pci_config_readw(bdf, PCI_CLASS_DEVICE); + if (class == PCI_CLASS_BRIDGE_PCI) { + u8 secbus = pci_config_readb(bdf, PCI_SECONDARY_BUS); + if (secbus >= ARRAY_SIZE(busses)) { + return; + } + struct pci_bus *s = busses + secbus; + s->io_base = pci_bios_bus_get_addr + (bus, PCI_BASE_ADDRESS_SPACE_IO, s->io_size); + s->mem_base = pci_bios_bus_get_addr + (bus, 0, s->mem_size); + s->prefmem_base = pci_bios_bus_get_addr + (bus, PCI_BASE_ADDRESS_MEM_PREFETCH, s->prefmem_size); + dprintf(1, "PCI: init bases bus %d (secondary)\n", secbus); + pci_bios_init_bus_bases(s); + /* TODO: commit assignments */ + pci_bios_map_device_in_bus(secbus); + return; + } + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + u32 val, size, addr; + pci_bios_bus_get_bar(bus, bdf, i, &val, &size); + if (val == 0) { + continue; + } + + addr = pci_bios_bus_get_addr(bus, val, size); + dprintf(1, " bar %d, addr %x, size %x [%s]\n", + i, addr, size, + val & PCI_BASE_ADDRESS_SPACE_IO ? "io" : "mem"); + /* TODO: commit assignments */ + + if (!(val & PCI_BASE_ADDRESS_SPACE_IO) && + (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64) { + i++; + } + } +} + +static void pci_bios_check_device_in_bus(int bus) +{ + int bdf, max; + + dprintf(1, "PCI: check devices bus %d\n", bus); + foreachpci_in_bus(bdf, max, bus) { + pci_bios_check_device(&busses[bus], bdf); + } +} + +static void pci_bios_map_device_in_bus(int bus) +{ + int bdf, max; + + foreachpci_in_bus(bdf, max, bus) { + dprintf(1, "PCI: map device bus %d, bfd 0x%x\n", bus, bdf); + pci_bios_map_device(&busses[bus], bdf); + } +} + +static void pci_bios_init_bus_bases(struct pci_bus *bus) +{ + u32 base, newbase, size; + int i; + + /* assign prefetchable memory regions */ + dprintf(1, " prefmem max %x sum %x base %x\n", + bus->prefmem_max, bus->prefmem_sum, bus->prefmem_base); + base = bus->prefmem_base; + for (i = ARRAY_SIZE(bus->prefmem_count)-1; i >= 0; i--) { + size = pci_mem_index_to_size(i); + if (!bus->prefmem_count[i]) + continue; + newbase = base + size * bus->prefmem_count[i]; + dprintf(1, " size %8x: %d bar(s), %8x -> %8x\n", + size, bus->prefmem_count[i], base, newbase - 1); + bus->prefmem_bases[i] = base; + base = newbase; + } + + /* assign memory regions */ + dprintf(1, " mem max %x sum %x base %x\n", + bus->mem_max, bus->mem_sum, bus->mem_base); + base = bus->mem_base; + for (i = ARRAY_SIZE(bus->mem_count)-1; i >= 0; i--) { + size = pci_mem_index_to_size(i); + if (!bus->mem_count[i]) + continue; + newbase = base + size * bus->mem_count[i]; + dprintf(1, " mem size %8x: %d bar(s), %8x -> %8x\n", + size, bus->mem_count[i], base, newbase - 1); + bus->mem_bases[i] = base; + base = newbase; + } + + /* assign io regions */ + dprintf(1, " io max %x sum %x base %x\n", + bus->io_max, bus->io_sum, bus->io_base); + base = bus->io_base; + for (i = ARRAY_SIZE(bus->io_count)-1; i >= 0; i--) { + size = pci_io_index_to_size(i); + if (!bus->io_count[i]) + continue; + newbase = base + size * bus->io_count[i]; + dprintf(1, " io size %4x: %d bar(s), %4x -> %4x\n", + size, bus->io_count[i], base, newbase - 1); + bus->io_bases[i] = base; + base = newbase; + } +} + +static void pci_bios_init_root_regions(void) +{ + struct pci_bus *bus = &busses[0]; + u32 reserved = 0xffffffff - 0xfec00000 + 1; + + /* calculate memory windows */ + if (bus->mem_sum) { + u32 window = bus->mem_max; + while (bus->mem_sum + reserved > window) { + window += bus->mem_max; + } + bus->mem_base = 0xffffffff - window + 1; + reserved = 0xffffffff - bus->mem_base + 1; + } + + if (bus->prefmem_sum) { + u32 window = bus->prefmem_max; + while (bus->prefmem_sum + reserved > window) { + window += bus->prefmem_max; + } + bus->prefmem_base = 0xffffffff - window + 1; + reserved = 0xffffffff - bus->prefmem_base + 1; + } + + bus->io_base = 0xc000; + + /* simple sanity check */ + /* TODO: check e820 table */ + if (bus->mem_base < RamSize) { + dprintf(1, "PCI: out of space for memory bars\n"); + /* Hmm, what to do now? */ + } + + dprintf(1, "PCI: init bases bus 0 (primary)\n"); + pci_bios_init_bus_bases(bus); +} + void pci_setup(void) { + int bdf, max; + if (CONFIG_COREBOOT) // Already done by coreboot. return;
dprintf(3, "pci setup\n");
+ dprintf(1, "=== PCI bus & bridge init ===\n"); pci_bios_init_bus();
+ dprintf(1, "=== PCI new allocation pass #1 ===\n"); + pci_bios_check_device_in_bus(0 /* host bus */); + pci_region_init(&pci_bios_io_region, 0xc000, 64 * 1024 - 1); struct pci_mem_addr addr = { .pci_bios_mem_region = &pci_bios_mem_region, @@ -429,7 +773,11 @@ pci_setup(void) }; pci_find_init_device(pci_mem_addr_tbl, &addr);
- int bdf, max; + dprintf(1, "=== PCI new allocation pass #2 ===\n"); + pci_bios_init_root_regions(); + pci_bios_map_device_in_bus(0 /* host bus */); + + dprintf(1, "=== PCI old allocation pass ===\n"); foreachpci(bdf, max) { pci_init_device(pci_isa_bridge_tbl, bdf, NULL); }
On Tue, May 24, 2011 at 11:05:32AM +0200, Gerd Hoffmann wrote:
This patch adds a second device scan to the pci initialization, which counts the memory bars of the various sizes and types. Then it calculates the sizes and the packing of the prefetchable and non-prefetchable pci memory windows and prints the results.
TODO: actually use the calculated stuff.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com
src/pciinit.c | 352 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 350 insertions(+), 2 deletions(-)
diff --git a/src/pciinit.c b/src/pciinit.c index 97df126..537eed0 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -17,12 +17,77 @@ #define PCI_ROM_SLOT 6 #define PCI_NUM_REGIONS 7
-static void pci_bios_init_device_in_bus(int bus); +#define PCI_IO_INDEX_SHIFT 2 +#define PCI_MEM_INDEX_SHIFT 12
+#define PCI_BRIDGE_IO_MIN 0x100 +#define PCI_BRIDGE_MEM_MIN 0x100000
static struct pci_region pci_bios_io_region; static struct pci_region pci_bios_mem_region; static struct pci_region pci_bios_prefmem_region;
+static struct pci_bus {
- /* pci region stats */
- u32 io_count[16 - PCI_IO_INDEX_SHIFT];
- u32 mem_count[32 - PCI_MEM_INDEX_SHIFT];
- u32 prefmem_count[32 - PCI_MEM_INDEX_SHIFT];
16, 32 - are these numbers worth defines? (same below)
Suppressing any coding style comments (not sure they are worth for RfC).
- u32 io_sum, io_max;
- u32 mem_sum, mem_max;
- u32 prefmem_sum, prefmem_max;
- /* seconday bus region sizes */
- u32 io_size, mem_size, prefmem_size;
- /* pci region assignments */
- u32 io_bases[16 - PCI_IO_INDEX_SHIFT];
- u32 mem_bases[32 - PCI_MEM_INDEX_SHIFT];
- u32 prefmem_bases[32 - PCI_MEM_INDEX_SHIFT];
- u32 io_base, mem_base, prefmem_base;
+} busses[32];
+static void pci_bios_init_device_in_bus(int bus); +static void pci_bios_check_device_in_bus(int bus); +static void pci_bios_init_bus_bases(struct pci_bus *bus); +static void pci_bios_map_device_in_bus(int bus);
+static int pci_size_to_index(u32 size, int shift) +{
- int index = 0;
- while (size > (1 << index)) {
index++;
- }
- if (index < shift)
index = shift;
- index -= shift;
- return index;
+}
+static u32 pci_size_roundup(u32 size) +{
- int index = pci_size_to_index(size, 0);
- return 1 << index;
+}
+static int pci_io_size_to_index(u32 size) +{
- return pci_size_to_index(size, PCI_IO_INDEX_SHIFT);
+}
+static u32 pci_io_index_to_size(int index) +{
- return 1 << (index + PCI_IO_INDEX_SHIFT);
+}
+static int pci_mem_size_to_index(u32 size) +{
- return pci_size_to_index(size, PCI_MEM_INDEX_SHIFT);
+}
+static u32 pci_mem_index_to_size(int index) +{
- return 1 << (index + PCI_MEM_INDEX_SHIFT);
+}
/* host irqs corresponding to PCI irqs A-D */ const u8 pci_irqs[4] = { 10, 10, 11, 11 @@ -411,17 +476,296 @@ static const struct pci_device_id pci_mem_addr_tbl[] = { PCI_DEVICE_END, };
+static void pci_bios_bus_get_bar(struct pci_bus *bus, int bdf, int bar,
u32 *val, u32 *size)
+{
- u32 ofs = pci_bar(bdf, bar);
- u32 old = pci_config_readl(bdf, ofs);
- u32 mask;
- if (bar == PCI_ROM_SLOT) {
mask = PCI_ROM_ADDRESS_MASK;
pci_config_writel(bdf, ofs, mask);
- } else {
if (old & PCI_BASE_ADDRESS_SPACE_IO)
mask = PCI_BASE_ADDRESS_IO_MASK;
else
mask = PCI_BASE_ADDRESS_MEM_MASK;
pci_config_writel(bdf, ofs, ~0);
- }
- *val = pci_config_readl(bdf, ofs);
- pci_config_writel(bdf, ofs, old);
- *size = (~(*val & mask)) + 1;
+}
+static void pci_bios_bus_reserve(struct pci_bus *bus, u32 val, u32 size) +{
- u32 index;
- if (val & PCI_BASE_ADDRESS_SPACE_IO) {
index = pci_io_size_to_index(size);
size = pci_io_index_to_size(index);
bus->io_count[index]++;
bus->io_sum += size;
if (bus->io_max < size)
bus->io_max = size;
- } else {
index = pci_mem_size_to_index(size);
size = pci_mem_index_to_size(index);
if (val & PCI_BASE_ADDRESS_MEM_PREFETCH) {
bus->prefmem_count[index]++;
bus->prefmem_sum += size;
if (bus->prefmem_max < size)
bus->prefmem_max = size;
} else {
bus->mem_count[index]++;
bus->mem_sum += size;
if (bus->mem_max < size)
bus->mem_max = size;
}
- }
+}
+static u32 pci_bios_bus_get_addr(struct pci_bus *bus, u32 val, u32 size) +{
- u32 index, addr;
- if (val & PCI_BASE_ADDRESS_SPACE_IO) {
index = pci_io_size_to_index(size);
addr = bus->io_bases[index];
bus->io_bases[index] += pci_io_index_to_size(index);
- } else {
index = pci_mem_size_to_index(size);
if (val & PCI_BASE_ADDRESS_MEM_PREFETCH) {
addr = bus->prefmem_bases[index];
bus->prefmem_bases[index] += pci_mem_index_to_size(index);
} else {
addr = bus->mem_bases[index];
bus->mem_bases[index] += pci_mem_index_to_size(index);
}
- }
- return addr;
+}
+static void pci_bios_check_device(struct pci_bus *bus, u16 bdf) +{
- u16 class;
- int i;
- class = pci_config_readw(bdf, PCI_CLASS_DEVICE);
- if (class == PCI_CLASS_BRIDGE_PCI) {
u8 secbus = pci_config_readb(bdf, PCI_SECONDARY_BUS);
if (secbus >= ARRAY_SIZE(busses)) {
dprintf(1, "PCI: busses array too small, skipping bus %d\n", secbus);
return;
}
struct pci_bus *s = busses + secbus;
pci_bios_check_device_in_bus(secbus);
s->io_size = pci_size_roundup(s->io_sum);
s->mem_size = pci_size_roundup(s->mem_sum);
s->prefmem_size = pci_size_roundup(s->prefmem_sum);
if (s->io_size < PCI_BRIDGE_IO_MIN) {
s->io_size = PCI_BRIDGE_IO_MIN;
}
if (s->mem_size < PCI_BRIDGE_MEM_MIN) {
s->mem_size = PCI_BRIDGE_MEM_MIN;
}
if (s->prefmem_size < PCI_BRIDGE_MEM_MIN) {
s->prefmem_size = PCI_BRIDGE_MEM_MIN;
}
dprintf(1, "PCI: secondary bus %d sizes: io %x, mem %x, prefmem %x\n",
secbus, s->io_size, s->mem_size, s->prefmem_size);
pci_bios_bus_reserve(bus, PCI_BASE_ADDRESS_SPACE_IO, s->io_size);
pci_bios_bus_reserve(bus, 0, s->mem_size);
pci_bios_bus_reserve(bus, PCI_BASE_ADDRESS_MEM_PREFETCH, s->prefmem_size);
return;
- }
- for (i = 0; i < PCI_NUM_REGIONS; i++) {
u32 val, size;
pci_bios_bus_get_bar(bus, bdf, i, &val, &size);
if (val == 0) {
continue;
}
pci_bios_bus_reserve(bus, val, size);
if (!(val & PCI_BASE_ADDRESS_SPACE_IO) &&
(val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64) {
i++;
}
- }
+}
+static void pci_bios_map_device(struct pci_bus *bus, u16 bdf) +{
- u16 class;
- int i;
- class = pci_config_readw(bdf, PCI_CLASS_DEVICE);
- if (class == PCI_CLASS_BRIDGE_PCI) {
u8 secbus = pci_config_readb(bdf, PCI_SECONDARY_BUS);
if (secbus >= ARRAY_SIZE(busses)) {
return;
}
struct pci_bus *s = busses + secbus;
s->io_base = pci_bios_bus_get_addr
(bus, PCI_BASE_ADDRESS_SPACE_IO, s->io_size);
s->mem_base = pci_bios_bus_get_addr
(bus, 0, s->mem_size);
s->prefmem_base = pci_bios_bus_get_addr
(bus, PCI_BASE_ADDRESS_MEM_PREFETCH, s->prefmem_size);
dprintf(1, "PCI: init bases bus %d (secondary)\n", secbus);
pci_bios_init_bus_bases(s);
/* TODO: commit assignments */
pci_bios_map_device_in_bus(secbus);
return;
- }
- for (i = 0; i < PCI_NUM_REGIONS; i++) {
u32 val, size, addr;
pci_bios_bus_get_bar(bus, bdf, i, &val, &size);
if (val == 0) {
continue;
}
addr = pci_bios_bus_get_addr(bus, val, size);
dprintf(1, " bar %d, addr %x, size %x [%s]\n",
i, addr, size,
val & PCI_BASE_ADDRESS_SPACE_IO ? "io" : "mem");
/* TODO: commit assignments */
if (!(val & PCI_BASE_ADDRESS_SPACE_IO) &&
(val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64) {
i++;
}
- }
+}
+static void pci_bios_check_device_in_bus(int bus) +{
- int bdf, max;
- dprintf(1, "PCI: check devices bus %d\n", bus);
- foreachpci_in_bus(bdf, max, bus) {
pci_bios_check_device(&busses[bus], bdf);
- }
+}
+static void pci_bios_map_device_in_bus(int bus) +{
- int bdf, max;
- foreachpci_in_bus(bdf, max, bus) {
dprintf(1, "PCI: map device bus %d, bfd 0x%x\n", bus, bdf);
pci_bios_map_device(&busses[bus], bdf);
- }
+}
+static void pci_bios_init_bus_bases(struct pci_bus *bus) +{
- u32 base, newbase, size;
- int i;
- /* assign prefetchable memory regions */
- dprintf(1, " prefmem max %x sum %x base %x\n",
bus->prefmem_max, bus->prefmem_sum, bus->prefmem_base);
- base = bus->prefmem_base;
- for (i = ARRAY_SIZE(bus->prefmem_count)-1; i >= 0; i--) {
size = pci_mem_index_to_size(i);
if (!bus->prefmem_count[i])
continue;
newbase = base + size * bus->prefmem_count[i];
dprintf(1, " size %8x: %d bar(s), %8x -> %8x\n",
size, bus->prefmem_count[i], base, newbase - 1);
bus->prefmem_bases[i] = base;
base = newbase;
- }
- /* assign memory regions */
- dprintf(1, " mem max %x sum %x base %x\n",
bus->mem_max, bus->mem_sum, bus->mem_base);
- base = bus->mem_base;
- for (i = ARRAY_SIZE(bus->mem_count)-1; i >= 0; i--) {
size = pci_mem_index_to_size(i);
if (!bus->mem_count[i])
continue;
newbase = base + size * bus->mem_count[i];
dprintf(1, " mem size %8x: %d bar(s), %8x -> %8x\n",
size, bus->mem_count[i], base, newbase - 1);
bus->mem_bases[i] = base;
base = newbase;
- }
- /* assign io regions */
- dprintf(1, " io max %x sum %x base %x\n",
bus->io_max, bus->io_sum, bus->io_base);
- base = bus->io_base;
- for (i = ARRAY_SIZE(bus->io_count)-1; i >= 0; i--) {
size = pci_io_index_to_size(i);
if (!bus->io_count[i])
continue;
newbase = base + size * bus->io_count[i];
dprintf(1, " io size %4x: %d bar(s), %4x -> %4x\n",
size, bus->io_count[i], base, newbase - 1);
bus->io_bases[i] = base;
base = newbase;
- }
+}
+static void pci_bios_init_root_regions(void) +{
- struct pci_bus *bus = &busses[0];
- u32 reserved = 0xffffffff - 0xfec00000 + 1;
- /* calculate memory windows */
- if (bus->mem_sum) {
u32 window = bus->mem_max;
while (bus->mem_sum + reserved > window) {
window += bus->mem_max;
}
bus->mem_base = 0xffffffff - window + 1;
reserved = 0xffffffff - bus->mem_base + 1;
- }
- if (bus->prefmem_sum) {
u32 window = bus->prefmem_max;
while (bus->prefmem_sum + reserved > window) {
window += bus->prefmem_max;
}
bus->prefmem_base = 0xffffffff - window + 1;
reserved = 0xffffffff - bus->prefmem_base + 1;
- }
- bus->io_base = 0xc000;
- /* simple sanity check */
- /* TODO: check e820 table */
- if (bus->mem_base < RamSize) {
dprintf(1, "PCI: out of space for memory bars\n");
/* Hmm, what to do now? */
- }
- dprintf(1, "PCI: init bases bus 0 (primary)\n");
- pci_bios_init_bus_bases(bus);
+}
void pci_setup(void) {
int bdf, max;
if (CONFIG_COREBOOT) // Already done by coreboot. return;
dprintf(3, "pci setup\n");
dprintf(1, "=== PCI bus & bridge init ===\n"); pci_bios_init_bus();
dprintf(1, "=== PCI new allocation pass #1 ===\n");
pci_bios_check_device_in_bus(0 /* host bus */);
pci_region_init(&pci_bios_io_region, 0xc000, 64 * 1024 - 1); struct pci_mem_addr addr = { .pci_bios_mem_region = &pci_bios_mem_region,
@@ -429,7 +773,11 @@ pci_setup(void) }; pci_find_init_device(pci_mem_addr_tbl, &addr);
- int bdf, max;
- dprintf(1, "=== PCI new allocation pass #2 ===\n");
- pci_bios_init_root_regions();
- pci_bios_map_device_in_bus(0 /* host bus */);
- dprintf(1, "=== PCI old allocation pass ===\n"); foreachpci(bdf, max) { pci_init_device(pci_isa_bridge_tbl, bdf, NULL); }
-- 1.7.1
SeaBIOS mailing list SeaBIOS@seabios.org http://www.seabios.org/mailman/listinfo/seabios
Basically it looks good. Great work. I've wanted to see this kind of patch. Some comments below.
On Tue, May 24, 2011 at 11:05:32AM +0200, Gerd Hoffmann wrote:
This patch adds a second device scan to the pci initialization, which counts the memory bars of the various sizes and types. Then it calculates the sizes and the packing of the prefetchable and non-prefetchable pci memory windows and prints the results.
TODO: actually use the calculated stuff.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com
src/pciinit.c | 352 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 350 insertions(+), 2 deletions(-)
diff --git a/src/pciinit.c b/src/pciinit.c index 97df126..537eed0 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -17,12 +17,77 @@ #define PCI_ROM_SLOT 6 #define PCI_NUM_REGIONS 7
-static void pci_bios_init_device_in_bus(int bus); +#define PCI_IO_INDEX_SHIFT 2 +#define PCI_MEM_INDEX_SHIFT 12
+#define PCI_BRIDGE_IO_MIN 0x100 +#define PCI_BRIDGE_MEM_MIN 0x100000
static struct pci_region pci_bios_io_region; static struct pci_region pci_bios_mem_region; static struct pci_region pci_bios_prefmem_region;
+static struct pci_bus {
- /* pci region stats */
- u32 io_count[16 - PCI_IO_INDEX_SHIFT];
- u32 mem_count[32 - PCI_MEM_INDEX_SHIFT];
- u32 prefmem_count[32 - PCI_MEM_INDEX_SHIFT];
- u32 io_sum, io_max;
- u32 mem_sum, mem_max;
- u32 prefmem_sum, prefmem_max;
- /* seconday bus region sizes */
- u32 io_size, mem_size, prefmem_size;
- /* pci region assignments */
- u32 io_bases[16 - PCI_IO_INDEX_SHIFT];
- u32 mem_bases[32 - PCI_MEM_INDEX_SHIFT];
- u32 prefmem_bases[32 - PCI_MEM_INDEX_SHIFT];
- u32 io_base, mem_base, prefmem_base;
+} busses[32];
Statically allocated? Should we be go for three pass by splitting the first pass into two? - 1.A pass: assign bus number and count the total number of pci buses. allocate busses by malloc_tmp() which will be freed later. - 1.B pass: calculate region size
+static void pci_bios_init_device_in_bus(int bus); +static void pci_bios_check_device_in_bus(int bus); +static void pci_bios_init_bus_bases(struct pci_bus *bus); +static void pci_bios_map_device_in_bus(int bus);
+static int pci_size_to_index(u32 size, int shift) +{
- int index = 0;
- while (size > (1 << index)) {
index++;
- }
- if (index < shift)
index = shift;
- index -= shift;
- return index;
+}
+static u32 pci_size_roundup(u32 size) +{
- int index = pci_size_to_index(size, 0);
- return 1 << index;
+}
+static int pci_io_size_to_index(u32 size) +{
- return pci_size_to_index(size, PCI_IO_INDEX_SHIFT);
+}
+static u32 pci_io_index_to_size(int index) +{
- return 1 << (index + PCI_IO_INDEX_SHIFT);
+}
+static int pci_mem_size_to_index(u32 size) +{
- return pci_size_to_index(size, PCI_MEM_INDEX_SHIFT);
+}
+static u32 pci_mem_index_to_size(int index) +{
- return 1 << (index + PCI_MEM_INDEX_SHIFT);
+}
/* host irqs corresponding to PCI irqs A-D */ const u8 pci_irqs[4] = { 10, 10, 11, 11 @@ -411,17 +476,296 @@ static const struct pci_device_id pci_mem_addr_tbl[] = { PCI_DEVICE_END, };
+static void pci_bios_bus_get_bar(struct pci_bus *bus, int bdf, int bar,
u32 *val, u32 *size)
+{
- u32 ofs = pci_bar(bdf, bar);
- u32 old = pci_config_readl(bdf, ofs);
- u32 mask;
- if (bar == PCI_ROM_SLOT) {
mask = PCI_ROM_ADDRESS_MASK;
pci_config_writel(bdf, ofs, mask);
- } else {
if (old & PCI_BASE_ADDRESS_SPACE_IO)
mask = PCI_BASE_ADDRESS_IO_MASK;
else
mask = PCI_BASE_ADDRESS_MEM_MASK;
pci_config_writel(bdf, ofs, ~0);
- }
- *val = pci_config_readl(bdf, ofs);
- pci_config_writel(bdf, ofs, old);
- *size = (~(*val & mask)) + 1;
+}
+static void pci_bios_bus_reserve(struct pci_bus *bus, u32 val, u32 size) +{
- u32 index;
- if (val & PCI_BASE_ADDRESS_SPACE_IO) {
index = pci_io_size_to_index(size);
size = pci_io_index_to_size(index);
bus->io_count[index]++;
bus->io_sum += size;
if (bus->io_max < size)
bus->io_max = size;
- } else {
index = pci_mem_size_to_index(size);
size = pci_mem_index_to_size(index);
if (val & PCI_BASE_ADDRESS_MEM_PREFETCH) {
bus->prefmem_count[index]++;
bus->prefmem_sum += size;
if (bus->prefmem_max < size)
bus->prefmem_max = size;
} else {
bus->mem_count[index]++;
bus->mem_sum += size;
if (bus->mem_max < size)
bus->mem_max = size;
}
- }
+}
+static u32 pci_bios_bus_get_addr(struct pci_bus *bus, u32 val, u32 size) +{
- u32 index, addr;
- if (val & PCI_BASE_ADDRESS_SPACE_IO) {
index = pci_io_size_to_index(size);
addr = bus->io_bases[index];
bus->io_bases[index] += pci_io_index_to_size(index);
- } else {
index = pci_mem_size_to_index(size);
if (val & PCI_BASE_ADDRESS_MEM_PREFETCH) {
addr = bus->prefmem_bases[index];
bus->prefmem_bases[index] += pci_mem_index_to_size(index);
} else {
addr = bus->mem_bases[index];
bus->mem_bases[index] += pci_mem_index_to_size(index);
}
- }
- return addr;
+}
+static void pci_bios_check_device(struct pci_bus *bus, u16 bdf) +{
- u16 class;
- int i;
- class = pci_config_readw(bdf, PCI_CLASS_DEVICE);
- if (class == PCI_CLASS_BRIDGE_PCI) {
u8 secbus = pci_config_readb(bdf, PCI_SECONDARY_BUS);
if (secbus >= ARRAY_SIZE(busses)) {
dprintf(1, "PCI: busses array too small, skipping bus %d\n", secbus);
return;
}
struct pci_bus *s = busses + secbus;
pci_bios_check_device_in_bus(secbus);
s->io_size = pci_size_roundup(s->io_sum);
s->mem_size = pci_size_roundup(s->mem_sum);
s->prefmem_size = pci_size_roundup(s->prefmem_sum);
if (s->io_size < PCI_BRIDGE_IO_MIN) {
s->io_size = PCI_BRIDGE_IO_MIN;
}
if (s->mem_size < PCI_BRIDGE_MEM_MIN) {
s->mem_size = PCI_BRIDGE_MEM_MIN;
}
if (s->prefmem_size < PCI_BRIDGE_MEM_MIN) {
s->prefmem_size = PCI_BRIDGE_MEM_MIN;
}
dprintf(1, "PCI: secondary bus %d sizes: io %x, mem %x, prefmem %x\n",
secbus, s->io_size, s->mem_size, s->prefmem_size);
pci_bios_bus_reserve(bus, PCI_BASE_ADDRESS_SPACE_IO, s->io_size);
pci_bios_bus_reserve(bus, 0, s->mem_size);
pci_bios_bus_reserve(bus, PCI_BASE_ADDRESS_MEM_PREFETCH, s->prefmem_size);
return;
- }
- for (i = 0; i < PCI_NUM_REGIONS; i++) {
u32 val, size;
pci_bios_bus_get_bar(bus, bdf, i, &val, &size);
if (val == 0) {
continue;
}
pci_bios_bus_reserve(bus, val, size);
if (!(val & PCI_BASE_ADDRESS_SPACE_IO) &&
(val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64) {
i++;
}
- }
+}
+static void pci_bios_map_device(struct pci_bus *bus, u16 bdf) +{
- u16 class;
- int i;
- class = pci_config_readw(bdf, PCI_CLASS_DEVICE);
- if (class == PCI_CLASS_BRIDGE_PCI) {
u8 secbus = pci_config_readb(bdf, PCI_SECONDARY_BUS);
if (secbus >= ARRAY_SIZE(busses)) {
return;
}
struct pci_bus *s = busses + secbus;
s->io_base = pci_bios_bus_get_addr
(bus, PCI_BASE_ADDRESS_SPACE_IO, s->io_size);
s->mem_base = pci_bios_bus_get_addr
(bus, 0, s->mem_size);
s->prefmem_base = pci_bios_bus_get_addr
(bus, PCI_BASE_ADDRESS_MEM_PREFETCH, s->prefmem_size);
dprintf(1, "PCI: init bases bus %d (secondary)\n", secbus);
pci_bios_init_bus_bases(s);
/* TODO: commit assignments */
pci_bios_map_device_in_bus(secbus);
return;
- }
- for (i = 0; i < PCI_NUM_REGIONS; i++) {
u32 val, size, addr;
pci_bios_bus_get_bar(bus, bdf, i, &val, &size);
if (val == 0) {
continue;
}
addr = pci_bios_bus_get_addr(bus, val, size);
dprintf(1, " bar %d, addr %x, size %x [%s]\n",
i, addr, size,
val & PCI_BASE_ADDRESS_SPACE_IO ? "io" : "mem");
/* TODO: commit assignments */
if (!(val & PCI_BASE_ADDRESS_SPACE_IO) &&
(val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64) {
i++;
}
- }
+}
+static void pci_bios_check_device_in_bus(int bus) +{
- int bdf, max;
- dprintf(1, "PCI: check devices bus %d\n", bus);
- foreachpci_in_bus(bdf, max, bus) {
pci_bios_check_device(&busses[bus], bdf);
- }
+}
+static void pci_bios_map_device_in_bus(int bus) +{
- int bdf, max;
- foreachpci_in_bus(bdf, max, bus) {
dprintf(1, "PCI: map device bus %d, bfd 0x%x\n", bus, bdf);
pci_bios_map_device(&busses[bus], bdf);
- }
+}
+static void pci_bios_init_bus_bases(struct pci_bus *bus) +{
- u32 base, newbase, size;
- int i;
- /* assign prefetchable memory regions */
- dprintf(1, " prefmem max %x sum %x base %x\n",
bus->prefmem_max, bus->prefmem_sum, bus->prefmem_base);
- base = bus->prefmem_base;
- for (i = ARRAY_SIZE(bus->prefmem_count)-1; i >= 0; i--) {
size = pci_mem_index_to_size(i);
if (!bus->prefmem_count[i])
continue;
newbase = base + size * bus->prefmem_count[i];
dprintf(1, " size %8x: %d bar(s), %8x -> %8x\n",
size, bus->prefmem_count[i], base, newbase - 1);
bus->prefmem_bases[i] = base;
base = newbase;
- }
- /* assign memory regions */
- dprintf(1, " mem max %x sum %x base %x\n",
bus->mem_max, bus->mem_sum, bus->mem_base);
- base = bus->mem_base;
- for (i = ARRAY_SIZE(bus->mem_count)-1; i >= 0; i--) {
size = pci_mem_index_to_size(i);
if (!bus->mem_count[i])
continue;
newbase = base + size * bus->mem_count[i];
dprintf(1, " mem size %8x: %d bar(s), %8x -> %8x\n",
size, bus->mem_count[i], base, newbase - 1);
bus->mem_bases[i] = base;
base = newbase;
- }
- /* assign io regions */
- dprintf(1, " io max %x sum %x base %x\n",
bus->io_max, bus->io_sum, bus->io_base);
- base = bus->io_base;
- for (i = ARRAY_SIZE(bus->io_count)-1; i >= 0; i--) {
size = pci_io_index_to_size(i);
if (!bus->io_count[i])
continue;
newbase = base + size * bus->io_count[i];
dprintf(1, " io size %4x: %d bar(s), %4x -> %4x\n",
size, bus->io_count[i], base, newbase - 1);
bus->io_bases[i] = base;
base = newbase;
- }
+}
+static void pci_bios_init_root_regions(void) +{
- struct pci_bus *bus = &busses[0];
- u32 reserved = 0xffffffff - 0xfec00000 + 1;
- /* calculate memory windows */
- if (bus->mem_sum) {
u32 window = bus->mem_max;
while (bus->mem_sum + reserved > window) {
window += bus->mem_max;
}
bus->mem_base = 0xffffffff - window + 1;
reserved = 0xffffffff - bus->mem_base + 1;
- }
- if (bus->prefmem_sum) {
u32 window = bus->prefmem_max;
while (bus->prefmem_sum + reserved > window) {
window += bus->prefmem_max;
}
bus->prefmem_base = 0xffffffff - window + 1;
reserved = 0xffffffff - bus->prefmem_base + 1;
- }
- bus->io_base = 0xc000;
- /* simple sanity check */
- /* TODO: check e820 table */
- if (bus->mem_base < RamSize) {
dprintf(1, "PCI: out of space for memory bars\n");
/* Hmm, what to do now? */
- }
- dprintf(1, "PCI: init bases bus 0 (primary)\n");
- pci_bios_init_bus_bases(bus);
+}
void pci_setup(void) {
int bdf, max;
if (CONFIG_COREBOOT) // Already done by coreboot. return;
dprintf(3, "pci setup\n");
dprintf(1, "=== PCI bus & bridge init ===\n"); pci_bios_init_bus();
dprintf(1, "=== PCI new allocation pass #1 ===\n");
pci_bios_check_device_in_bus(0 /* host bus */);
pci_region_init(&pci_bios_io_region, 0xc000, 64 * 1024 - 1); struct pci_mem_addr addr = { .pci_bios_mem_region = &pci_bios_mem_region,
@@ -429,7 +773,11 @@ pci_setup(void) }; pci_find_init_device(pci_mem_addr_tbl, &addr);
- int bdf, max;
- dprintf(1, "=== PCI new allocation pass #2 ===\n");
- pci_bios_init_root_regions();
- pci_bios_map_device_in_bus(0 /* host bus */);
- dprintf(1, "=== PCI old allocation pass ===\n"); foreachpci(bdf, max) { pci_init_device(pci_isa_bridge_tbl, bdf, NULL); }
-- 1.7.1
SeaBIOS mailing list SeaBIOS@seabios.org http://www.seabios.org/mailman/listinfo/seabios
The patch doesn't check any overflows. And it assumes that BAR assignment is always possible. However it isn't true. Some people complained about it before, so I added overflow check.
Some devices have very huge BAR like 1G, 2G... For example, qemu ivshmem or device-assignment of home-made device. Those huge BAR would be unallocated with warning. (Usually such huge BAR is 64bit, and we don't allocate 64bit BAR to 4G+ area at the moment.)
Other possibility is that pci bus can have many child buses and many devices. Some people has expressed that they want several hundreds pci slots. In this case, assign BAR as much as possible, and leave rest BARs unallocated with warning.
The patch is RfC, so you simply haven't address those yet, though.
thanks,
On Wed, May 25, 2011 at 11:34:06AM +0900, Isaku Yamahata wrote:
Basically it looks good. Great work. I've wanted to see this kind of patch. Some comments below.
On Tue, May 24, 2011 at 11:05:32AM +0200, Gerd Hoffmann wrote:
This patch adds a second device scan to the pci initialization, which counts the memory bars of the various sizes and types. Then it calculates the sizes and the packing of the prefetchable and non-prefetchable pci memory windows and prints the results.
TODO: actually use the calculated stuff.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com
src/pciinit.c | 352 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 350 insertions(+), 2 deletions(-)
diff --git a/src/pciinit.c b/src/pciinit.c index 97df126..537eed0 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -17,12 +17,77 @@ #define PCI_ROM_SLOT 6 #define PCI_NUM_REGIONS 7
-static void pci_bios_init_device_in_bus(int bus); +#define PCI_IO_INDEX_SHIFT 2 +#define PCI_MEM_INDEX_SHIFT 12
+#define PCI_BRIDGE_IO_MIN 0x100 +#define PCI_BRIDGE_MEM_MIN 0x100000
static struct pci_region pci_bios_io_region; static struct pci_region pci_bios_mem_region; static struct pci_region pci_bios_prefmem_region;
+static struct pci_bus {
- /* pci region stats */
- u32 io_count[16 - PCI_IO_INDEX_SHIFT];
- u32 mem_count[32 - PCI_MEM_INDEX_SHIFT];
- u32 prefmem_count[32 - PCI_MEM_INDEX_SHIFT];
- u32 io_sum, io_max;
- u32 mem_sum, mem_max;
- u32 prefmem_sum, prefmem_max;
- /* seconday bus region sizes */
- u32 io_size, mem_size, prefmem_size;
- /* pci region assignments */
- u32 io_bases[16 - PCI_IO_INDEX_SHIFT];
- u32 mem_bases[32 - PCI_MEM_INDEX_SHIFT];
- u32 prefmem_bases[32 - PCI_MEM_INDEX_SHIFT];
- u32 io_base, mem_base, prefmem_base;
+} busses[32];
Statically allocated? Should we be go for three pass by splitting the first pass into two?
- 1.A pass: assign bus number and count the total number of pci buses.
allocate busses by malloc_tmp() which will be freed later.
- 1.B pass: calculate region size
+static void pci_bios_init_device_in_bus(int bus); +static void pci_bios_check_device_in_bus(int bus); +static void pci_bios_init_bus_bases(struct pci_bus *bus); +static void pci_bios_map_device_in_bus(int bus);
+static int pci_size_to_index(u32 size, int shift) +{
- int index = 0;
- while (size > (1 << index)) {
index++;
- }
- if (index < shift)
index = shift;
- index -= shift;
- return index;
+}
+static u32 pci_size_roundup(u32 size) +{
- int index = pci_size_to_index(size, 0);
- return 1 << index;
+}
+static int pci_io_size_to_index(u32 size) +{
- return pci_size_to_index(size, PCI_IO_INDEX_SHIFT);
+}
+static u32 pci_io_index_to_size(int index) +{
- return 1 << (index + PCI_IO_INDEX_SHIFT);
+}
+static int pci_mem_size_to_index(u32 size) +{
- return pci_size_to_index(size, PCI_MEM_INDEX_SHIFT);
+}
+static u32 pci_mem_index_to_size(int index) +{
- return 1 << (index + PCI_MEM_INDEX_SHIFT);
+}
/* host irqs corresponding to PCI irqs A-D */ const u8 pci_irqs[4] = { 10, 10, 11, 11 @@ -411,17 +476,296 @@ static const struct pci_device_id pci_mem_addr_tbl[] = { PCI_DEVICE_END, };
+static void pci_bios_bus_get_bar(struct pci_bus *bus, int bdf, int bar,
u32 *val, u32 *size)
+{
- u32 ofs = pci_bar(bdf, bar);
- u32 old = pci_config_readl(bdf, ofs);
- u32 mask;
- if (bar == PCI_ROM_SLOT) {
mask = PCI_ROM_ADDRESS_MASK;
pci_config_writel(bdf, ofs, mask);
- } else {
if (old & PCI_BASE_ADDRESS_SPACE_IO)
mask = PCI_BASE_ADDRESS_IO_MASK;
else
mask = PCI_BASE_ADDRESS_MEM_MASK;
pci_config_writel(bdf, ofs, ~0);
- }
- *val = pci_config_readl(bdf, ofs);
- pci_config_writel(bdf, ofs, old);
- *size = (~(*val & mask)) + 1;
+}
+static void pci_bios_bus_reserve(struct pci_bus *bus, u32 val, u32 size) +{
- u32 index;
- if (val & PCI_BASE_ADDRESS_SPACE_IO) {
index = pci_io_size_to_index(size);
size = pci_io_index_to_size(index);
bus->io_count[index]++;
bus->io_sum += size;
if (bus->io_max < size)
bus->io_max = size;
- } else {
index = pci_mem_size_to_index(size);
size = pci_mem_index_to_size(index);
if (val & PCI_BASE_ADDRESS_MEM_PREFETCH) {
bus->prefmem_count[index]++;
bus->prefmem_sum += size;
if (bus->prefmem_max < size)
bus->prefmem_max = size;
} else {
bus->mem_count[index]++;
bus->mem_sum += size;
if (bus->mem_max < size)
bus->mem_max = size;
}
- }
+}
+static u32 pci_bios_bus_get_addr(struct pci_bus *bus, u32 val, u32 size) +{
- u32 index, addr;
- if (val & PCI_BASE_ADDRESS_SPACE_IO) {
index = pci_io_size_to_index(size);
addr = bus->io_bases[index];
bus->io_bases[index] += pci_io_index_to_size(index);
- } else {
index = pci_mem_size_to_index(size);
if (val & PCI_BASE_ADDRESS_MEM_PREFETCH) {
addr = bus->prefmem_bases[index];
bus->prefmem_bases[index] += pci_mem_index_to_size(index);
} else {
addr = bus->mem_bases[index];
bus->mem_bases[index] += pci_mem_index_to_size(index);
}
- }
- return addr;
+}
+static void pci_bios_check_device(struct pci_bus *bus, u16 bdf) +{
- u16 class;
- int i;
- class = pci_config_readw(bdf, PCI_CLASS_DEVICE);
- if (class == PCI_CLASS_BRIDGE_PCI) {
u8 secbus = pci_config_readb(bdf, PCI_SECONDARY_BUS);
if (secbus >= ARRAY_SIZE(busses)) {
dprintf(1, "PCI: busses array too small, skipping bus %d\n", secbus);
return;
}
struct pci_bus *s = busses + secbus;
pci_bios_check_device_in_bus(secbus);
s->io_size = pci_size_roundup(s->io_sum);
s->mem_size = pci_size_roundup(s->mem_sum);
s->prefmem_size = pci_size_roundup(s->prefmem_sum);
if (s->io_size < PCI_BRIDGE_IO_MIN) {
s->io_size = PCI_BRIDGE_IO_MIN;
}
if (s->mem_size < PCI_BRIDGE_MEM_MIN) {
s->mem_size = PCI_BRIDGE_MEM_MIN;
}
if (s->prefmem_size < PCI_BRIDGE_MEM_MIN) {
s->prefmem_size = PCI_BRIDGE_MEM_MIN;
}
dprintf(1, "PCI: secondary bus %d sizes: io %x, mem %x, prefmem %x\n",
secbus, s->io_size, s->mem_size, s->prefmem_size);
pci_bios_bus_reserve(bus, PCI_BASE_ADDRESS_SPACE_IO, s->io_size);
pci_bios_bus_reserve(bus, 0, s->mem_size);
pci_bios_bus_reserve(bus, PCI_BASE_ADDRESS_MEM_PREFETCH, s->prefmem_size);
return;
- }
- for (i = 0; i < PCI_NUM_REGIONS; i++) {
u32 val, size;
pci_bios_bus_get_bar(bus, bdf, i, &val, &size);
if (val == 0) {
continue;
}
pci_bios_bus_reserve(bus, val, size);
if (!(val & PCI_BASE_ADDRESS_SPACE_IO) &&
(val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64) {
i++;
}
- }
+}
+static void pci_bios_map_device(struct pci_bus *bus, u16 bdf) +{
- u16 class;
- int i;
- class = pci_config_readw(bdf, PCI_CLASS_DEVICE);
- if (class == PCI_CLASS_BRIDGE_PCI) {
u8 secbus = pci_config_readb(bdf, PCI_SECONDARY_BUS);
if (secbus >= ARRAY_SIZE(busses)) {
return;
}
struct pci_bus *s = busses + secbus;
s->io_base = pci_bios_bus_get_addr
(bus, PCI_BASE_ADDRESS_SPACE_IO, s->io_size);
s->mem_base = pci_bios_bus_get_addr
(bus, 0, s->mem_size);
s->prefmem_base = pci_bios_bus_get_addr
(bus, PCI_BASE_ADDRESS_MEM_PREFETCH, s->prefmem_size);
dprintf(1, "PCI: init bases bus %d (secondary)\n", secbus);
pci_bios_init_bus_bases(s);
/* TODO: commit assignments */
pci_bios_map_device_in_bus(secbus);
return;
- }
- for (i = 0; i < PCI_NUM_REGIONS; i++) {
u32 val, size, addr;
pci_bios_bus_get_bar(bus, bdf, i, &val, &size);
if (val == 0) {
continue;
}
addr = pci_bios_bus_get_addr(bus, val, size);
dprintf(1, " bar %d, addr %x, size %x [%s]\n",
i, addr, size,
val & PCI_BASE_ADDRESS_SPACE_IO ? "io" : "mem");
/* TODO: commit assignments */
if (!(val & PCI_BASE_ADDRESS_SPACE_IO) &&
(val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64) {
i++;
}
- }
+}
+static void pci_bios_check_device_in_bus(int bus) +{
- int bdf, max;
- dprintf(1, "PCI: check devices bus %d\n", bus);
- foreachpci_in_bus(bdf, max, bus) {
pci_bios_check_device(&busses[bus], bdf);
- }
+}
+static void pci_bios_map_device_in_bus(int bus) +{
- int bdf, max;
- foreachpci_in_bus(bdf, max, bus) {
dprintf(1, "PCI: map device bus %d, bfd 0x%x\n", bus, bdf);
pci_bios_map_device(&busses[bus], bdf);
- }
+}
+static void pci_bios_init_bus_bases(struct pci_bus *bus) +{
- u32 base, newbase, size;
- int i;
- /* assign prefetchable memory regions */
- dprintf(1, " prefmem max %x sum %x base %x\n",
bus->prefmem_max, bus->prefmem_sum, bus->prefmem_base);
- base = bus->prefmem_base;
- for (i = ARRAY_SIZE(bus->prefmem_count)-1; i >= 0; i--) {
size = pci_mem_index_to_size(i);
if (!bus->prefmem_count[i])
continue;
newbase = base + size * bus->prefmem_count[i];
dprintf(1, " size %8x: %d bar(s), %8x -> %8x\n",
size, bus->prefmem_count[i], base, newbase - 1);
bus->prefmem_bases[i] = base;
base = newbase;
- }
- /* assign memory regions */
- dprintf(1, " mem max %x sum %x base %x\n",
bus->mem_max, bus->mem_sum, bus->mem_base);
- base = bus->mem_base;
- for (i = ARRAY_SIZE(bus->mem_count)-1; i >= 0; i--) {
size = pci_mem_index_to_size(i);
if (!bus->mem_count[i])
continue;
newbase = base + size * bus->mem_count[i];
dprintf(1, " mem size %8x: %d bar(s), %8x -> %8x\n",
size, bus->mem_count[i], base, newbase - 1);
bus->mem_bases[i] = base;
base = newbase;
- }
- /* assign io regions */
- dprintf(1, " io max %x sum %x base %x\n",
bus->io_max, bus->io_sum, bus->io_base);
- base = bus->io_base;
- for (i = ARRAY_SIZE(bus->io_count)-1; i >= 0; i--) {
size = pci_io_index_to_size(i);
if (!bus->io_count[i])
continue;
newbase = base + size * bus->io_count[i];
dprintf(1, " io size %4x: %d bar(s), %4x -> %4x\n",
size, bus->io_count[i], base, newbase - 1);
bus->io_bases[i] = base;
base = newbase;
- }
+}
+static void pci_bios_init_root_regions(void) +{
- struct pci_bus *bus = &busses[0];
- u32 reserved = 0xffffffff - 0xfec00000 + 1;
- /* calculate memory windows */
- if (bus->mem_sum) {
u32 window = bus->mem_max;
while (bus->mem_sum + reserved > window) {
window += bus->mem_max;
}
bus->mem_base = 0xffffffff - window + 1;
reserved = 0xffffffff - bus->mem_base + 1;
- }
- if (bus->prefmem_sum) {
u32 window = bus->prefmem_max;
while (bus->prefmem_sum + reserved > window) {
window += bus->prefmem_max;
}
bus->prefmem_base = 0xffffffff - window + 1;
reserved = 0xffffffff - bus->prefmem_base + 1;
- }
- bus->io_base = 0xc000;
- /* simple sanity check */
- /* TODO: check e820 table */
- if (bus->mem_base < RamSize) {
dprintf(1, "PCI: out of space for memory bars\n");
/* Hmm, what to do now? */
- }
- dprintf(1, "PCI: init bases bus 0 (primary)\n");
- pci_bios_init_bus_bases(bus);
+}
void pci_setup(void) {
int bdf, max;
if (CONFIG_COREBOOT) // Already done by coreboot. return;
dprintf(3, "pci setup\n");
dprintf(1, "=== PCI bus & bridge init ===\n"); pci_bios_init_bus();
dprintf(1, "=== PCI new allocation pass #1 ===\n");
pci_bios_check_device_in_bus(0 /* host bus */);
pci_region_init(&pci_bios_io_region, 0xc000, 64 * 1024 - 1); struct pci_mem_addr addr = { .pci_bios_mem_region = &pci_bios_mem_region,
@@ -429,7 +773,11 @@ pci_setup(void) }; pci_find_init_device(pci_mem_addr_tbl, &addr);
- int bdf, max;
- dprintf(1, "=== PCI new allocation pass #2 ===\n");
- pci_bios_init_root_regions();
- pci_bios_map_device_in_bus(0 /* host bus */);
- dprintf(1, "=== PCI old allocation pass ===\n"); foreachpci(bdf, max) { pci_init_device(pci_isa_bridge_tbl, bdf, NULL); }
-- 1.7.1
SeaBIOS mailing list SeaBIOS@seabios.org http://www.seabios.org/mailman/listinfo/seabios
-- yamahata
SeaBIOS mailing list SeaBIOS@seabios.org http://www.seabios.org/mailman/listinfo/seabios
On 05/25/11 06:33, Isaku Yamahata wrote:
The patch doesn't check any overflows. And it assumes that BAR assignment is always possible. However it isn't true. Some people complained about it before, so I added overflow check.
Indeed, there is no error handling at all yet.
Some devices have very huge BAR like 1G, 2G... For example, qemu ivshmem or device-assignment of home-made device. Those huge BAR would be unallocated with warning. (Usually such huge BAR is 64bit, and we don't allocate 64bit BAR to 4G+ area at the moment.)
A number of possible improvements I can think of:
(1) Figure which devices are needed to boot. That list should include VGA, storage devices, NICs (with ROM), maybe all devices with a ROM. Anything else? (2) For devices which are not needed to boot we can: (a) Map 64bit bars above 4G. (b) Skip device altogether. (3) All bridges get some minimum amount of io and memory space. This is needed for bridges which support hotplug only.
Not sure it is worth trying to map 64bit bars above 4G as we can do that only for devices not needed to boot (otherwise we'll have trouble driving the device), and for those devices we can leave that job to the guest OS anyway ...
+static struct pci_bus { +} busses[32];
Statically allocated? Should we be go for three pass by splitting the first pass into two?
Well, there actually is a bus enumeration pass already, so switching to dynamic allocation should be easy.
cheers, Gerd
Hi,
(1) Figure which devices are needed to boot. That list should include VGA, storage devices, NICs (with ROM), maybe all devices with a ROM. Anything else? (2) For devices which are not needed to boot we can: (b) Skip device altogether.
Something like this ...
cheers, Gerd
PS: full patch collection @ http://www.kraxel.org/cgit/seabios/log/?h=kraxel.q35
Hi Gerd,
On 25.05.2011 14:45, Gerd Hoffmann wrote:
PS: full patch collection @ http://www.kraxel.org/cgit/seabios/log/?h=kraxel.q35
I checked out your branch under Ubuntu Natty and ran "make". But unfortunately linking failed with the following error(see below).
root@test:/tmp/seabios-pci-two-pass# make Build Kconfig config file Compiling whole program out/ccode.16.s Compiling to assembler out/asm-offsets.s Generating offset file out/asm-offsets.h Compiling (16bit) out/code16.o Compiling whole program out/ccode32flat.o Compiling whole program out/code32seg.o Building ld scripts (version "pre-0.6.3-20110526_043255-test") Fixed space: 0xe05b-0x10000 total: 8101 slack: 2 Percent slack: 0.0% 16bit size: 39408 32bit segmented size: 1636 32bit flat size: 13292 32bit flat init size: 58352 Linking out/rom16.o out/romlayout16.lds:699 cannot move location counter backwards (from 000000000000ca0f to 000000000000c9f4) make: *** [out/rom16.o] Error 1
Natty comes with gcc-4.5.2 and binutils 2.21.0.
Regards André
Hi,
out/romlayout16.lds:699 cannot move location counter backwards (from 000000000000ca0f to 000000000000c9f4)
I've seen this on the list already, IIRC this is some binutils bug, check the archive.
cheers, Gerd
Hi Gerd, I managed to pass through a graphics card to a Windows7 VM using your kraxel.q35 seabios branch (http://www.kraxel.org/cgit/seabios/log/?h=kraxel.q35).
Here is my setup: Intel DX58SO Core i7 920 Radeon HD 6950 Kernel 2.6.35.7 qemu-kvm git pull from May 26th
The gfx card is working well on the desktop even with Aero turned on. I tried various 3D Games which worked good (some were a bit choppy ;-) though).
There were 2 games that made Windows crash. What is the way to debug what went wrong here?
One thing that is not working is the pass-through of a second device, a sound card in my case. As soon as I pass 2 devices to the VM, Windows does not boot anymore. If I continue and remove the second device from the command line the VM does not boot anymore with the passed through gfx card. The graphics card drivers crashes Windows, with the error message of not being able to reset the graphics card. A reboot of the host cures this problem.
These are the command lines I used to start the VM:
echo "1002 6719" > /sys/bus/pci/drivers/pci-stub/new_id echo 0000:03:00.0 > /sys/bus/pci/devices/0000:03:00.0/driver/unbind echo 0000:03:00.0 > /sys/bus/pci/drivers/pci-stub/bind echo "1002 6719" > /sys/bus/pci/drivers/pci-stub/remove_id
qemu-system-x86_64 \ -boot order=cd -cpu host -vnc 192.168.3.1:0 -k de \ -smp 2,cores=2,sockets=1 \ -drive file=${IMAGE},if=ide,index=0,cache=writeback -m 4096 \ -drive file=/dev/sda3,if=ide,index=1,cache=none \ -drive file=/dev/sdb3,if=ide,index=2,cache=none \ -drive file=/dev/cdrom,if=ide,index=3,media=cdrom,cache=none \ -device pci-assign,host=03:00.0 \ -net nic,model=e1000,macaddr=DE:AD:BE:EF:42:42 \ -net tap,script=/usr/local/bin/qemu-ifup \ -usb -usbdevice host:045e:00f9 \ -monitor telnet:192.168.3.1:11111,server,nowait,nodelay
Thank you very much for your work.
Regards André
On 2011-05-27 07:32, André Weidemann wrote:
Hi Gerd, I managed to pass through a graphics card to a Windows7 VM using your kraxel.q35 seabios branch (http://www.kraxel.org/cgit/seabios/log/?h=kraxel.q35).
Here is my setup: Intel DX58SO Core i7 920 Radeon HD 6950 Kernel 2.6.35.7 qemu-kvm git pull from May 26th
The gfx card is working well on the desktop even with Aero turned on. I tried various 3D Games which worked good (some were a bit choppy ;-) though).
Cool!
There were 2 games that made Windows crash. What is the way to debug what went wrong here?
Likely that's very hard to analyze, specifically as pass-through may also widen pre-existing race windows that may reveal real bugs in the drivers. However, you could start with tracing the host kvm events around the crash using ftrace / trace-cmd. With a lot of luck, the traces may contain some suspicious access patterns.
One thing that is not working is the pass-through of a second device, a sound card in my case. As soon as I pass 2 devices to the VM, Windows does not boot anymore. If I continue and remove the second device from the command line the VM does not boot anymore with the passed through gfx card. The graphics card drivers crashes Windows, with the error message of not being able to reset the graphics card. A reboot of the host cures this problem.
Does it make a difference when you preserve the function address by specifying -device pci-assign,host=03:00.0,addr=XX.0 and -device pci-assign,host=03:00.1,addr=XX.1 (where XX is some free slot number in the guest)?
These are the command lines I used to start the VM:
echo "1002 6719" > /sys/bus/pci/drivers/pci-stub/new_id echo 0000:03:00.0 > /sys/bus/pci/devices/0000:03:00.0/driver/unbind echo 0000:03:00.0 > /sys/bus/pci/drivers/pci-stub/bind echo "1002 6719" > /sys/bus/pci/drivers/pci-stub/remove_id
qemu-system-x86_64 \ -boot order=cd -cpu host -vnc 192.168.3.1:0 -k de \ -smp 2,cores=2,sockets=1 \ -drive file=${IMAGE},if=ide,index=0,cache=writeback -m 4096 \ -drive file=/dev/sda3,if=ide,index=1,cache=none \ -drive file=/dev/sdb3,if=ide,index=2,cache=none \ -drive file=/dev/cdrom,if=ide,index=3,media=cdrom,cache=none \ -device pci-assign,host=03:00.0 \ -net nic,model=e1000,macaddr=DE:AD:BE:EF:42:42 \ -net tap,script=/usr/local/bin/qemu-ifup \ -usb -usbdevice host:045e:00f9 \ -monitor telnet:192.168.3.1:11111,server,nowait,nodelay
Thank you very much for your work.
I've started to create a wiki page to save success stories like this one and collect the open issues, see http://www.linux-kvm.org/wiki/index.php?title=VGA_device_assignment. Everyone is welcome to fix, update, or otherwise enhance it.
Jan
Hi,
On 27.05.2011 13:09, Jan Kiszka wrote:
On 2011-05-27 07:32, André Weidemann wrote:
Here is my setup: Intel DX58SO Core i7 920 Radeon HD 6950 Kernel 2.6.35.7 qemu-kvm git pull from May 26th
One thing that is not working is the pass-through of a second device, a sound card in my case. As soon as I pass 2 devices to the VM, Windows does not boot anymore. If I continue and remove the second device from the command line the VM does not boot anymore with the passed through gfx card. The graphics card drivers crashes Windows, with the error message of not being able to reset the graphics card. A reboot of the host cures this problem.
Does it make a difference when you preserve the function address by specifying -device pci-assign,host=03:00.0,addr=XX.0 and -device pci-assign,host=03:00.1,addr=XX.1 (where XX is some free slot number in the guest)?
I did some further testing in assigning both the VGA and the sound card. If I pass the devices to the VM like this:
-device pci-assign,host=03:00.0,addr=0x4 \ #VGA -device pci-assign,host=00:1b.0,addr=0x5 \ #Sound
Windows boots up, but the sound card is not visible as a device inside the VM.
Running "info pci" in the monitor shows this: Bus 0, device 4, function 0: VGA controller: PCI device 1002:6719 IRQ 11. BAR0: 32 bit prefetchable memory at 0xe0000000 [0xefffffff]. BAR2: 32 bit memory at 0xfeba0000 [0xfebbffff]. BAR4: I/O at 0xc000 [0xc0ff]. BAR6: 32 bit memory at 0xffffffffffffffff [0x0001fffe]. id "" Bus 0, device 5, function 0: Audio controller: PCI device 8086:3a3e IRQ 10. BAR0: 32 bit memory at 0xffffffffffffffff [0x00003ffe]. id ""
If I switch the order of the two devices to this:
-device pci-assign,host=00:1b.0,addr=0x4 \ #Sound -device pci-assign,host=03:00.0,addr=0x5 \ #VGA
I get the following error and kvm crashes:
It looks like the free PCI memory was not enough to assign the graphics card after assigning the sound card.
If I only assign the sound card, then "info pci" in the monitor looks like this: Bus 0, device 4, function 0: Audio controller: PCI device 8086:3a3e IRQ 10. BAR0: 32 bit memory at 0xfebf0000 [0xfebf3fff]. id ""
The device is then visible inside the VM and working.
If I am not mistaken then the graphics card needs 2 bars, one with 256MB and one with 128K. The sound card then needs 1 bar with 16K of PCI memory. How big is the PCI memory with seabios? Is there really not enough space to "squeeze" in those extra 16K?
Regards André
On 27.05.2011 21:40, André Weidemann wrote:
If I am not mistaken then the graphics card needs 2 bars, one with 256MB and one with 128K. The sound card then needs 1 bar with 16K of PCI memory. How big is the PCI memory with seabios? Is there really not enough space to "squeeze" in those extra 16K?
I obviously forgot to add up the other memory that is used... 32MB go to the standard VGA card. Running qemu-kvm with "-vga none" did not work, so I left it in. And the e1000 NIC needs another 128K.
I'll see if I can get rid of the standard VGA card. I guess that should free enough memory for the sound card.
Regards André
On 27.05.2011 21:50, André Weidemann wrote:
On 27.05.2011 21:40, André Weidemann wrote:
If I am not mistaken then the graphics card needs 2 bars, one with 256MB and one with 128K. The sound card then needs 1 bar with 16K of PCI memory. How big is the PCI memory with seabios? Is there really not enough space to "squeeze" in those extra 16K?
I obviously forgot to add up the other memory that is used... 32MB go to the standard VGA card. Running qemu-kvm with "-vga none" did not work, so I left it in. And the e1000 NIC needs another 128K.
I'll see if I can get rid of the standard VGA card. I guess that should free enough memory for the sound card.
I did some more testing by starting the VM with the paramter "-vga none" and passed both the VGA card and the sound card to it. With this option the VM did not boot, but I could use the monitor to take a look at the PCI bar assignment. Even though the memory for the standard VGA card is freed, the soundcard does not seem to get the 16K bar it needs. "info pci" for the sound card still looks like this: Bus 0, device 5, function 0: Audio controller: PCI device 8086:3a3e IRQ 10. BAR0: 32 bit memory at 0xffffffffffffffff [0x00003ffe].
Does anyone have an idea why there was no bar assigned? Can the kernel be too old? (2.6.35.7.)
Just to test whether or not two devices can be assigned, I passed through 2 sound cards. (There is an onbard sound card and the Radeon has one too).
Each sound card gets its bar assigned as you can see: Bus 0, device 4, function 0: Audio controller: PCI device 1002:aa80 IRQ 10. BAR0: 32 bit memory at 0xfebf0000 [0xfebf3fff]. id "" Bus 0, device 5, function 0: Audio controller: PCI device 8086:3a3e IRQ 10. BAR0: 32 bit memory at 0xfebf4000 [0xfebf7fff].
but the sound cards do not show inside the Windows VM.
With both sound cards still passed to the VM I then booted an Ubuntu 10.10 image instead of Windows7. It got as far as starting gdm, but then the entire host and VM became very slow. The last message I saw on the terminal before gdm started was this: [ 23.030016 ] hda_intel: azx_get_response timeout, switching to single_cmd mode: last cmd=0x000f0000 [ 29.290017 ] hda_intel: azx_get_response timeout, switching to single_cmd mode: last cmd=0x200f0000
So it seems there is a problem of some kind when 2 devices are passed to a VM. Did anyone else ever notice anything similar?
Regards André
On 2011-05-26 23:19, André Weidemann wrote:
On 27.05.2011 21:50, André Weidemann wrote:
On 27.05.2011 21:40, André Weidemann wrote:
If I am not mistaken then the graphics card needs 2 bars, one with 256MB and one with 128K. The sound card then needs 1 bar with 16K of PCI memory. How big is the PCI memory with seabios? Is there really not enough space to "squeeze" in those extra 16K?
I obviously forgot to add up the other memory that is used... 32MB go to the standard VGA card. Running qemu-kvm with "-vga none" did not work, so I left it in. And the e1000 NIC needs another 128K.
I'll see if I can get rid of the standard VGA card. I guess that should free enough memory for the sound card.
I did some more testing by starting the VM with the paramter "-vga none" and passed both the VGA card and the sound card to it. With this option the VM did not boot,
Where did it hang, ie. what IP was reported by info cpus?
but I could use the monitor to take a look at the PCI bar assignment. Even though the memory for the standard VGA card is freed, the soundcard does not seem to get the 16K bar it needs. "info pci" for the sound card still looks like this: Bus 0, device 5, function 0: Audio controller: PCI device 8086:3a3e IRQ 10. BAR0: 32 bit memory at 0xffffffffffffffff [0x00003ffe].
Does anyone have an idea why there was no bar assigned?
Maybe Gerd's patches aren't sufficient and you still need to change BUILD_MAX_HIGHMEM. See the hacks in http://git.kiszka.org/?p=seabios.git;a=shortlog;h=refs/heads/vga-assign, either replacing Gerd's patches or combined with them (I haven't checked if the latter makes sense).
Can the kernel be too old? (2.6.35.7.)
It would be good to check the latest kvm kernel to see if that oops is still present. In that case, please try to collect the backtrace via serial console, hopefully complete then. We may have an resource cleanup issue there.
Just to test whether or not two devices can be assigned, I passed through 2 sound cards. (There is an onbard sound card and the Radeon has one too).
Each sound card gets its bar assigned as you can see: Bus 0, device 4, function 0: Audio controller: PCI device 1002:aa80 IRQ 10. BAR0: 32 bit memory at 0xfebf0000 [0xfebf3fff]. id "" Bus 0, device 5, function 0: Audio controller: PCI device 8086:3a3e IRQ 10. BAR0: 32 bit memory at 0xfebf4000 [0xfebf7fff].
but the sound cards do not show inside the Windows VM.
With both sound cards still passed to the VM I then booted an Ubuntu 10.10 image instead of Windows7. It got as far as starting gdm, but then the entire host and VM became very slow. The last message I saw on the terminal before gdm started was this: [ 23.030016 ] hda_intel: azx_get_response timeout, switching to single_cmd mode: last cmd=0x000f0000 [ 29.290017 ] hda_intel: azx_get_response timeout, switching to single_cmd mode: last cmd=0x200f0000
Likely some IRQ issue. Please check if latest qemu-kvm.git + http://thread.gmane.org/gmane.comp.emulators.qemu/102540 makes any difference.
Jan
Hi,
On 28.05.2011 10:18, Jan Kiszka wrote:
On 2011-05-26 23:19, André Weidemann wrote:
On 27.05.2011 21:50, André Weidemann wrote:
On 27.05.2011 21:40, André Weidemann wrote:
If I am not mistaken then the graphics card needs 2 bars, one with 256MB and one with 128K. The sound card then needs 1 bar with 16K of PCI memory. How big is the PCI memory with seabios? Is there really not enough space to "squeeze" in those extra 16K?
I obviously forgot to add up the other memory that is used... 32MB go to the standard VGA card. Running qemu-kvm with "-vga none" did not work, so I left it in. And the e1000 NIC needs another 128K.
I'll see if I can get rid of the standard VGA card. I guess that should free enough memory for the sound card.
I did some more testing by starting the VM with the paramter "-vga none" and passed both the VGA card and the sound card to it. With this option the VM did not boot,
Where did it hang, ie. what IP was reported by info cpus?
I added some debug options and found out, that the VM hangs when trying to initialize the graphics card ROM. See here: http://pastebin.com/S9a8uQfU
And some additional info here:
http://pastebin.com/AC4rw8Ek (info cpus/registers) http://pastebin.com/yYkn8jL2 (info pci)
but I could use the monitor to take a look at the PCI bar assignment. Even though the memory for the standard VGA card is freed, the soundcard does not seem to get the 16K bar it needs. "info pci" for the sound card still looks like this: Bus 0, device 5, function 0: Audio controller: PCI device 8086:3a3e IRQ 10. BAR0: 32 bit memory at 0xffffffffffffffff [0x00003ffe].
Does anyone have an idea why there was no bar assigned?
Maybe Gerd's patches aren't sufficient and you still need to change BUILD_MAX_HIGHMEM. See the hacks in http://git.kiszka.org/?p=seabios.git;a=shortlog;h=refs/heads/vga-assign, either replacing Gerd's patches or combined with them (I haven't checked if the latter makes sense).
I do not have access to the machine until tomorrow. I'm curious to see if extending the PCI memory window will cure the problem.
Can the kernel be too old? (2.6.35.7.)
It would be good to check the latest kvm kernel to see if that oops is still present. In that case, please try to collect the backtrace via serial console, hopefully complete then. We may have an resource cleanup issue there.
I will see if I can upgrade to the latest kernel tomorrow.
Just to test whether or not two devices can be assigned, I passed through 2 sound cards. (There is an onbard sound card and the Radeon has one too).
Each sound card gets its bar assigned as you can see: Bus 0, device 4, function 0: Audio controller: PCI device 1002:aa80 IRQ 10. BAR0: 32 bit memory at 0xfebf0000 [0xfebf3fff]. id "" Bus 0, device 5, function 0: Audio controller: PCI device 8086:3a3e IRQ 10. BAR0: 32 bit memory at 0xfebf4000 [0xfebf7fff].
but the sound cards do not show inside the Windows VM.
With both sound cards still passed to the VM I then booted an Ubuntu 10.10 image instead of Windows7. It got as far as starting gdm, but then the entire host and VM became very slow. The last message I saw on the terminal before gdm started was this: [ 23.030016 ] hda_intel: azx_get_response timeout, switching to single_cmd mode: last cmd=0x000f0000 [ 29.290017 ] hda_intel: azx_get_response timeout, switching to single_cmd mode: last cmd=0x200f0000
Likely some IRQ issue. Please check if latest qemu-kvm.git + http://thread.gmane.org/gmane.comp.emulators.qemu/102540 makes any difference.
See above comments. I will try this tomorrow.
Regards André
On 2010-05-28 20:49, André Weidemann wrote:
Hi,
On 28.05.2011 10:18, Jan Kiszka wrote:
On 2011-05-26 23:19, André Weidemann wrote:
On 27.05.2011 21:50, André Weidemann wrote:
On 27.05.2011 21:40, André Weidemann wrote:
If I am not mistaken then the graphics card needs 2 bars, one with 256MB and one with 128K. The sound card then needs 1 bar with 16K of PCI memory. How big is the PCI memory with seabios? Is there really not enough space to "squeeze" in those extra 16K?
I obviously forgot to add up the other memory that is used... 32MB go to the standard VGA card. Running qemu-kvm with "-vga none" did not work, so I left it in. And the e1000 NIC needs another 128K.
I'll see if I can get rid of the standard VGA card. I guess that should free enough memory for the sound card.
I did some more testing by starting the VM with the paramter "-vga none" and passed both the VGA card and the sound card to it. With this option the VM did not boot,
Where did it hang, ie. what IP was reported by info cpus?
I added some debug options and found out, that the VM hangs when trying to initialize the graphics card ROM. See here: http://pastebin.com/S9a8uQfU
And some additional info here:
http://pastebin.com/AC4rw8Ek (info cpus/registers) http://pastebin.com/yYkn8jL2 (info pci)
Yeah, you definitely run out of PCI memory.
Plus you may suffer from the PAM/SMRAM bug I wrote about in the wiki. Try if this hack improves the situation:
http://git.kiszka.org/?p=qemu-kvm.git;a=commitdiff;h=96e600f43275310364c0310...
Jan
On 05/29/2011 06:22 PM, Jan Kiszka wrote:
Yeah, you definitely run out of PCI memory.
Plus you may suffer from the PAM/SMRAM bug I wrote about in the wiki. Try if this hack improves the situation:
http://git.kiszka.org/?p=qemu-kvm.git;a=commitdiff;h=96e600f43275310364c0310...
Hopefully the memory API will fix this. I'll try to push it forwards this week while Marcelo's minding kvm.git.
Hi,
On 29.05.2011 17:22, Jan Kiszka wrote:
On 2010-05-28 20:49, André Weidemann wrote:
Hi,
On 28.05.2011 10:18, Jan Kiszka wrote:
On 2011-05-26 23:19, André Weidemann wrote:
On 27.05.2011 21:50, André Weidemann wrote:
On 27.05.2011 21:40, André Weidemann wrote:
If I am not mistaken then the graphics card needs 2 bars, one with 256MB and one with 128K. The sound card then needs 1 bar with 16K of PCI memory. How big is the PCI memory with seabios? Is there really not enough space to "squeeze" in those extra 16K?
I obviously forgot to add up the other memory that is used... 32MB go to the standard VGA card. Running qemu-kvm with "-vga none" did not work, so I left it in. And the e1000 NIC needs another 128K.
I'll see if I can get rid of the standard VGA card. I guess that should free enough memory for the sound card.
I did some more testing by starting the VM with the paramter "-vga none" and passed both the VGA card and the sound card to it. With this option the VM did not boot,
Where did it hang, ie. what IP was reported by info cpus?
I added some debug options and found out, that the VM hangs when trying to initialize the graphics card ROM. See here: http://pastebin.com/S9a8uQfU
And some additional info here:
http://pastebin.com/AC4rw8Ek (info cpus/registers) http://pastebin.com/yYkn8jL2 (info pci)
Yeah, you definitely run out of PCI memory.
Plus you may suffer from the PAM/SMRAM bug I wrote about in the wiki. Try if this hack improves the situation:
http://git.kiszka.org/?p=qemu-kvm.git;a=commitdiff;h=96e600f43275310364c0310...
I applied above patch, but it did not make a difference. The VM did not boot with the parameter "-vga none".
I found the time to apply your two patches here: http://git.kiszka.org/?p=seabios.git;a=shortlog;h=refs/heads/vga-assign After doing so, Gerds q35 bios branch prints an error message during compile. The error comes from line 33 here: http://www.kraxel.org/cgit/seabios/tree/src/dev-q35.c?h=kraxel.q35 How do I adjust the memory area here?
Regards André
On 2011-05-29 21:30, André Weidemann wrote:
Hi,
On 29.05.2011 17:22, Jan Kiszka wrote:
On 2010-05-28 20:49, André Weidemann wrote:
Hi,
On 28.05.2011 10:18, Jan Kiszka wrote:
On 2011-05-26 23:19, André Weidemann wrote:
On 27.05.2011 21:50, André Weidemann wrote:
On 27.05.2011 21:40, André Weidemann wrote:
> If I am not mistaken then the graphics card needs 2 bars, one with > 256MB > and one with 128K. The sound card then needs 1 bar with 16K of PCI > memory. > How big is the PCI memory with seabios? > Is there really not enough space to "squeeze" in those extra 16K?
I obviously forgot to add up the other memory that is used... 32MB go to the standard VGA card. Running qemu-kvm with "-vga none" did not work, so I left it in. And the e1000 NIC needs another 128K.
I'll see if I can get rid of the standard VGA card. I guess that should free enough memory for the sound card.
I did some more testing by starting the VM with the paramter "-vga none" and passed both the VGA card and the sound card to it. With this option the VM did not boot,
Where did it hang, ie. what IP was reported by info cpus?
I added some debug options and found out, that the VM hangs when trying to initialize the graphics card ROM. See here: http://pastebin.com/S9a8uQfU
And some additional info here:
http://pastebin.com/AC4rw8Ek (info cpus/registers) http://pastebin.com/yYkn8jL2 (info pci)
Yeah, you definitely run out of PCI memory.
Plus you may suffer from the PAM/SMRAM bug I wrote about in the wiki. Try if this hack improves the situation:
http://git.kiszka.org/?p=qemu-kvm.git;a=commitdiff;h=96e600f43275310364c0310...
I applied above patch, but it did not make a difference. The VM did not boot with the parameter "-vga none".
Err, yes, of course. You also need at least legacy VGA pass-through like in this commit:
http://git.kiszka.org/?p=qemu-kvm.git;a=commitdiff;h=5c14a1b3510d977f194b756...
But there is no guarantee that the BIOS you try to run through POST inside the guest is actually prepared for this. Maybe specifying an empty ROM helps to get the guest a bit further:
-device pci-assign,romfile=,host=...
I found the time to apply your two patches here: http://git.kiszka.org/?p=seabios.git;a=shortlog;h=refs/heads/vga-assign After doing so, Gerds q35 bios branch prints an error message during compile. The error comes from line 33 here: http://www.kraxel.org/cgit/seabios/tree/src/dev-q35.c?h=kraxel.q35 How do I adjust the memory area here?
Just use my branch without any of Gerd's patches for the first step.
Jan
Hi,
If I am not mistaken then the graphics card needs 2 bars, one with 256MB and one with 128K. The sound card then needs 1 bar with 16K of PCI memory. How big is the PCI memory with seabios?
Some comments on that (apply to the kraxel.q35 branch):
You can add this to the qemu command line to get the seabios debug output printed:
qemu $usualoptions -chardev stdio,id=seabios \ -device isa-debugcon,iobase=0x402,chardev=seabios
Then you can watch seabios assigning memory.
The ordering of the cards should not matter at all, the hacked bios does a two-pass scan to order the memory bars by size and pack them nicely.
Finding holes in address space for pci bars is not perfect yet. Right now it has sort-of hardcoded the 0xf0000000 -> 0xfec00000 range and tries to fit in stuff there. If it fails to fit in everything it tries again but leaves out devices which are not essential for boot. If it still fails the error isn't handled yet. For q35 this is a non-starter as it uses the e0000000 -> efffffff range for pci mmconfig. For the piix it doesn't hurt for now as the area above 0xe0000000 is unused. And the address space up to 0xe0000000 is used for guest ram only, thus it depends on the amount of memory you assign to the guest whenever this area is free or not. If you assign -- say -- 2G to your guest everything above 0x80000000 is free.
With your setup (256m + 128m + small stuff) I'd expect seabios place the 256m bar @ 0xe0000000, the 128m bar @ 0xf0000000 and all the small ones above 0xf8000000. It will maps only the devices which it thinks are essential for boot (i.e. skip the sound card).
At the end of the day I want the pci address space allocation code have no hard-coded addresses in there but use the e820 table instead to figure how big the address space hole is. Maybe even use multiple holes (i.e. also use the memory below mmconfig @ 0xe0000000 with q35).
cheers, Gerd
Hi,
At the end of the day I want the pci address space allocation code have no hard-coded addresses in there but use the e820 table instead to figure how big the address space hole is. Maybe even use multiple holes (i.e. also use the memory below mmconfig @ 0xe0000000 with q35).
First cut hacked up & pushed to kraxel.q35
cheers, Gerd
On Mon, May 30, 2011 at 09:34:07AM +0200, Gerd Hoffmann wrote:
Hi,
If I am not mistaken then the graphics card needs 2 bars, one with 256MB and one with 128K. The sound card then needs 1 bar with 16K of PCI memory. How big is the PCI memory with seabios?
Some comments on that (apply to the kraxel.q35 branch):
You can add this to the qemu command line to get the seabios debug output printed:
qemu $usualoptions -chardev stdio,id=seabios \ -device isa-debugcon,iobase=0x402,chardev=seabios
Then you can watch seabios assigning memory.
The ordering of the cards should not matter at all, the hacked bios does a two-pass scan to order the memory bars by size and pack them nicely.
Finding holes in address space for pci bars is not perfect yet. Right now it has sort-of hardcoded the 0xf0000000 -> 0xfec00000 range and tries to fit in stuff there. If it fails to fit in everything it tries again but leaves out devices which are not essential for boot. If it still fails the error isn't handled yet. For q35 this is a non-starter as it uses the e0000000 -> efffffff range for pci mmconfig. For the piix it doesn't hurt for now as the area above 0xe0000000 is unused. And the address space up to 0xe0000000 is used for guest ram only, thus it depends on the amount of memory you assign to the guest whenever this area is free or not. If you assign -- say -- 2G to your guest everything above 0x80000000 is free.
With your setup (256m + 128m + small stuff) I'd expect seabios place the 256m bar @ 0xe0000000, the 128m bar @ 0xf0000000 and all the small ones above 0xf8000000. It will maps only the devices which it thinks are essential for boot (i.e. skip the sound card).
At the end of the day I want the pci address space allocation code have no hard-coded addresses in there but use the e820 table instead to figure how big the address space hole is. Maybe even use multiple holes (i.e. also use the memory below mmconfig @ 0xe0000000 with q35).
0xE0000000 is hard-coded in the DSDT for both piix and q35 as below. If the range is determined dynamically, the area also needs to be updated somehow dynamically.
... Name (_CRS, ResourceTemplate () ... DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, NonCacheable, ReadWrite, 0x00000000, // Address Space Granularity 0xE0000000, // Address Range Minimum 0xFEBFFFFF, // Address Range Maximum 0x00000000, // Address Translation Offset 0x1EC00000, // Address Length ,, , AddressRangeMemory, TypeStatic)
thanks,
Hi,
0xE0000000 is hard-coded in the DSDT for both piix and q35 as below. If the range is determined dynamically, the area also needs to be updated somehow dynamically.
... Name (_CRS, ResourceTemplate () ... DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, NonCacheable, ReadWrite, 0x00000000, // Address Space Granularity 0xE0000000, // Address Range Minimum 0xFEBFFFFF, // Address Range Maximum 0x00000000, // Address Translation Offset 0x1EC00000, // Address Length ,, , AddressRangeMemory, TypeStatic)
Uhm, indeed. I know next to nothing about ACPI though. Ideas anyone how this could be done?
cheers, Gerd
On 01.06.2011, at 09:30, Gerd Hoffmann wrote:
Hi,
0xE0000000 is hard-coded in the DSDT for both piix and q35 as below. If the range is determined dynamically, the area also needs to be updated somehow dynamically.
... Name (_CRS, ResourceTemplate () ... DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, NonCacheable, ReadWrite, 0x00000000, // Address Space Granularity 0xE0000000, // Address Range Minimum 0xFEBFFFFF, // Address Range Maximum 0x00000000, // Address Translation Offset 0x1EC00000, // Address Length ,, , AddressRangeMemory, TypeStatic)
Uhm, indeed. I know next to nothing about ACPI though. Ideas anyone how this could be done?
We're facing similar issues on PPC. The equivalent of the DSDT there is the device tree, which is currently passed in as binary blob and slightly appended for dynamic configuration. I'd much rather like to see it fully generated inside of Qemu from all the information we have available there, so we don't run into consistency issues.
This will be even more required when we pass through SoC devices to the guest, which are not on a PCI bus. Without specifying them in the DT, the guest doesn't know about them. X86 has a similar issue. Take a look at the HPET for example. If you don't want an HPET inside the guest, the DSDT needs to be modified. So you need to change things at 2 places - the DSDT and Qemu.
I don't know how much work it would be to generate the DSDT dynamically from Qemu, but IMHO that's the sanest way to make things flexible. We could probably even extract most information from the Qdev tree.
Alex
I don't know how much work it would be to generate the DSDT dynamically from Qemu, but IMHO that's the sanest way to make things flexible. We could probably even extract most information from the Qdev tree.
Well I have written for coreboot a ACPI bytecode generator (check acpigen.c). Maybe moving to SeaBIOS+Coreboot would make sense?
Thanks, Rudolf
Sorry I forgot to include all addresses.
I don't know how much work it would be to generate the DSDT dynamically from Qemu, but IMHO that's the sanest way to make things flexible. We could probably even extract most information from the Qdev tree.
Well I have written for coreboot a ACPI bytecode generator (check acpigen.c). Maybe moving to SeaBIOS+Coreboot would make sense? It has the PCI resource allocator and all already done.
Thanks, Rudolf
On 06/01/11 12:20, Rudolf Marek wrote:
Sorry I forgot to include all addresses.
I don't know how much work it would be to generate the DSDT dynamically from Qemu, but IMHO that's the sanest way to make things flexible. We could probably even extract most information from the Qdev tree.
Well I have written for coreboot a ACPI bytecode generator (check acpigen.c). Maybe moving to SeaBIOS+Coreboot would make sense? It has the PCI resource allocator and all already done.
Probably makes more sense to move to qemu, then pass the tables to seabios as qemu knows which virtual hardware is requested by the user and thus is present and which isn't.
Having a brief look at the coreboot code it seems static stuff (compiled by iasl) and dynamic bits are combined into the final dsdt table, is that correct?
thanks, Gerd
Having a brief look at the coreboot code it seems static stuff (compiled by iasl) and dynamic bits are combined into the final dsdt table, is that correct?
Yes the dsdt is static, it has just external references to ssdt which is dynamically generated using the acpigen.
Acpigen can generate the packages, names and sometimes even bits of methods.
Thanks Rudolf
On Wed, Jun 01, 2011 at 04:40:15PM +0200, Rudolf Marek wrote:
Having a brief look at the coreboot code it seems static stuff (compiled by iasl) and dynamic bits are combined into the final dsdt table, is that correct?
Yes the dsdt is static, it has just external references to ssdt which is dynamically generated using the acpigen.
Acpigen can generate the packages, names and sometimes even bits of methods.
That's interesting. SeaBIOS also has similar code - see acpi.c:build_ssdt().
-Kevin
Hi Folks,
Could anyone give me a 101 lesson on what is "two-pass PCI initialization"?
I tried some time ago to pass a LSI megaraid to a rhel5 guest, but the initialization of the pci device inside the VM fails (because, I think, it was already initialized in the host).
Just wondering could that be related? Have anyone tried passing a raid card to the guest?
Thanks
_Yushu
+-------------------------------------------------+ | Yushu Yao | Ph:1-510-486-4690 | | Lawrence Berkeley National Lab | Mailstop 50B-6222 | 1 Cyclotron Road | Berkeley CA 94720-8147 - USA +-------------------------------------------------+
On Wed, Jun 1, 2011 at 3:15 PM, Kevin O'Connor kevin@koconnor.net wrote:
On Wed, Jun 01, 2011 at 04:40:15PM +0200, Rudolf Marek wrote:
Having a brief look at the coreboot code it seems static stuff (compiled
by
iasl) and dynamic bits are combined into the final dsdt table, is that
correct?
Yes the dsdt is static, it has just external references to ssdt which is dynamically generated using the acpigen.
Acpigen can generate the packages, names and sometimes even bits of
methods.
That's interesting. SeaBIOS also has similar code - see acpi.c:build_ssdt().
-Kevin
To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 06/01/2011 12:56 PM, Alexander Graf wrote:
On 01.06.2011, at 09:30, Gerd Hoffmann wrote:
Hi,
0xE0000000 is hard-coded in the DSDT for both piix and q35 as below. If the range is determined dynamically, the area also needs to be updated somehow dynamically.
... Name (_CRS, ResourceTemplate () ... DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, NonCacheable, ReadWrite, 0x00000000, // Address Space Granularity 0xE0000000, // Address Range Minimum 0xFEBFFFFF, // Address Range Maximum 0x00000000, // Address Translation Offset 0x1EC00000, // Address Length ,, , AddressRangeMemory, TypeStatic)
Uhm, indeed. I know next to nothing about ACPI though. Ideas anyone how this could be done?
We're facing similar issues on PPC. The equivalent of the DSDT there is the device tree, which is currently passed in as binary blob and slightly appended for dynamic configuration. I'd much rather like to see it fully generated inside of Qemu from all the information we have available there, so we don't run into consistency issues.
This will be even more required when we pass through SoC devices to the guest, which are not on a PCI bus. Without specifying them in the DT, the guest doesn't know about them. X86 has a similar issue. Take a look at the HPET for example. If you don't want an HPET inside the guest, the DSDT needs to be modified. So you need to change things at 2 places - the DSDT and Qemu.
I don't know how much work it would be to generate the DSDT dynamically from Qemu, but IMHO that's the sanest way to make things flexible. We could probably even extract most information from the Qdev tree.
Generating the DSDT dynamically is hard, but the DSDT itself is dynamic. You can make any function talk to the firmware configuration interface and return results that depend on the information there.
On 01.06.2011, at 13:13, Avi Kivity wrote:
On 06/01/2011 12:56 PM, Alexander Graf wrote:
On 01.06.2011, at 09:30, Gerd Hoffmann wrote:
Hi,
0xE0000000 is hard-coded in the DSDT for both piix and q35 as below. If the range is determined dynamically, the area also needs to be updated somehow dynamically.
... Name (_CRS, ResourceTemplate () ... DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, NonCacheable, ReadWrite, 0x00000000, // Address Space Granularity 0xE0000000, // Address Range Minimum 0xFEBFFFFF, // Address Range Maximum 0x00000000, // Address Translation Offset 0x1EC00000, // Address Length ,, , AddressRangeMemory, TypeStatic)
Uhm, indeed. I know next to nothing about ACPI though. Ideas anyone how this could be done?
We're facing similar issues on PPC. The equivalent of the DSDT there is the device tree, which is currently passed in as binary blob and slightly appended for dynamic configuration. I'd much rather like to see it fully generated inside of Qemu from all the information we have available there, so we don't run into consistency issues.
This will be even more required when we pass through SoC devices to the guest, which are not on a PCI bus. Without specifying them in the DT, the guest doesn't know about them. X86 has a similar issue. Take a look at the HPET for example. If you don't want an HPET inside the guest, the DSDT needs to be modified. So you need to change things at 2 places - the DSDT and Qemu.
I don't know how much work it would be to generate the DSDT dynamically from Qemu, but IMHO that's the sanest way to make things flexible. We could probably even extract most information from the Qdev tree.
Generating the DSDT dynamically is hard, but the DSDT itself is dynamic. You can make any function talk to the firmware configuration interface and return results that depend on the information there.
Does that hold true for nodes as well? I thought you can only use 'functions' for specific elements?
Alex
On 06/01/2011 02:16 PM, Alexander Graf wrote:
Generating the DSDT dynamically is hard, but the DSDT itself is dynamic. You can make any function talk to the firmware configuration interface and return results that depend on the information there.
Does that hold true for nodes as well? I thought you can only use 'functions' for specific elements?
I believe that if you declare _CRS as a method, it should work (there are both "Name" and "Method" _CRSs in the DSDT currently).
On Wed, Jun 01, 2011 at 09:30:12AM +0200, Gerd Hoffmann wrote:
Hi,
0xE0000000 is hard-coded in the DSDT for both piix and q35 as below. If the range is determined dynamically, the area also needs to be updated somehow dynamically.
... Name (_CRS, ResourceTemplate () ... DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, NonCacheable, ReadWrite, 0x00000000, // Address Space Granularity 0xE0000000, // Address Range Minimum 0xFEBFFFFF, // Address Range Maximum 0x00000000, // Address Translation Offset 0x1EC00000, // Address Length ,, , AddressRangeMemory, TypeStatic)
Uhm, indeed. I know next to nothing about ACPI though. Ideas anyone how this could be done?
Right now what I can think of is. It would be possible to know the offset in AmlCode[] by compiling dsl with -l option. The we can get the mix of source and resulted hex with offset like
iasl -l q35-acpi-dsdt.dsl.i => q35-acpi-dsdt.dsl.lst
192.... DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, NonCacheable, ReadWrite, 193.... 0x00000000, 194.... 0xE0000000, 195.... 0xFEBFFFFF, 196.... 0x00000000, 197.... 0x1EC00000, 198.... ,, , AddressRangeMemory, TypeStatic)
00000F74....87 17 00 00 0C 01 00 00 "........" 00000F7C....00 00 00 00 00 E0 FF FF "........" 00000F84....BF FE 00 00 00 00 00 00 "........" 00000F8C....C0 1E .................. ".." 00000F8E....79 00 .................. "y."
But this is very iasl specific and fragile...
On 2011-06-01 16:20, Isaku Yamahata wrote:
On Wed, Jun 01, 2011 at 09:30:12AM +0200, Gerd Hoffmann wrote:
Hi,
0xE0000000 is hard-coded in the DSDT for both piix and q35 as below. If the range is determined dynamically, the area also needs to be updated somehow dynamically.
... Name (_CRS, ResourceTemplate () ... DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, NonCacheable, ReadWrite, 0x00000000, // Address Space Granularity 0xE0000000, // Address Range Minimum 0xFEBFFFFF, // Address Range Maximum 0x00000000, // Address Translation Offset 0x1EC00000, // Address Length ,, , AddressRangeMemory, TypeStatic)
Uhm, indeed. I know next to nothing about ACPI though. Ideas anyone how this could be done?
Right now what I can think of is. It would be possible to know the offset in AmlCode[] by compiling dsl with -l option. The we can get the mix of source and resulted hex with offset like
iasl -l q35-acpi-dsdt.dsl.i => q35-acpi-dsdt.dsl.lst
192.... DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, NonCacheable, ReadWrite, 193.... 0x00000000, 194.... 0xE0000000, 195.... 0xFEBFFFFF, 196.... 0x00000000, 197.... 0x1EC00000, 198.... ,, , AddressRangeMemory, TypeStatic)
00000F74....87 17 00 00 0C 01 00 00 "........" 00000F7C....00 00 00 00 00 E0 FF FF "........" 00000F84....BF FE 00 00 00 00 00 00 "........" 00000F8C....C0 1E .................. ".." 00000F8E....79 00 .................. "y."
But this is very iasl specific and fragile...
Why make this particular thing dynamic? Physical PCs need to define a certain fixed PCI memory region as well and then live with it. IMHO, the current setting is just too small for normal use.
Jan
On Wed, 2011-06-01 at 16:31 +0200, Jan Kiszka wrote:
On 2011-06-01 16:20, Isaku Yamahata wrote:
On Wed, Jun 01, 2011 at 09:30:12AM +0200, Gerd Hoffmann wrote:
Hi,
0xE0000000 is hard-coded in the DSDT for both piix and q35 as below. If the range is determined dynamically, the area also needs to be updated somehow dynamically.
... Name (_CRS, ResourceTemplate () ... DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, NonCacheable, ReadWrite, 0x00000000, // Address Space Granularity 0xE0000000, // Address Range Minimum 0xFEBFFFFF, // Address Range Maximum 0x00000000, // Address Translation Offset 0x1EC00000, // Address Length ,, , AddressRangeMemory, TypeStatic)
Uhm, indeed. I know next to nothing about ACPI though. Ideas anyone how this could be done?
Right now what I can think of is. It would be possible to know the offset in AmlCode[] by compiling dsl with -l option. The we can get the mix of source and resulted hex with offset like
iasl -l q35-acpi-dsdt.dsl.i => q35-acpi-dsdt.dsl.lst
192.... DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, NonCacheable, ReadWrite, 193.... 0x00000000, 194.... 0xE0000000, 195.... 0xFEBFFFFF, 196.... 0x00000000, 197.... 0x1EC00000, 198.... ,, , AddressRangeMemory, TypeStatic)
00000F74....87 17 00 00 0C 01 00 00 "........" 00000F7C....00 00 00 00 00 E0 FF FF "........" 00000F84....BF FE 00 00 00 00 00 00 "........" 00000F8C....C0 1E .................. ".." 00000F8E....79 00 .................. "y."
But this is very iasl specific and fragile...
Why make this particular thing dynamic? Physical PCs need to define a certain fixed PCI memory region as well and then live with it. IMHO, the current setting is just too small for normal use.
I don't think that's true. It's possible a BIOS could walk the PCI bus, figure out how much MMIO space it needs to reserve, then configure the chipset to re-route the necessary memory and build the DSDT. Hardly anything is actually fixed within the chipset AIUI. Thanks,
Alex
On Wed, Jun 01, 2011 at 11:20:29PM +0900, Isaku Yamahata wrote:
On Wed, Jun 01, 2011 at 09:30:12AM +0200, Gerd Hoffmann wrote:
Hi,
0xE0000000 is hard-coded in the DSDT for both piix and q35 as below. If the range is determined dynamically, the area also needs to be updated somehow dynamically.
... Name (_CRS, ResourceTemplate () ... DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, NonCacheable, ReadWrite, 0x00000000, // Address Space Granularity 0xE0000000, // Address Range Minimum 0xFEBFFFFF, // Address Range Maximum 0x00000000, // Address Translation Offset 0x1EC00000, // Address Length ,, , AddressRangeMemory, TypeStatic)
Uhm, indeed. I know next to nothing about ACPI though. Ideas anyone how this could be done?
Right now what I can think of is. It would be possible to know the offset in AmlCode[] by compiling dsl with -l option. The we can get the mix of source and resulted hex with offset like
It's easier then this - as Avi indicated, one can turn _CRS into a method which returns the current info with the PCI size filled in at runtime. Something like:
Method (_CRS, 0, NotSerialized) { Name (TMP, ResourceTemplate () { ... }) CreateDWordField (TMPM, 0x123, TMP) Store (TMPM, PCIM) Return (TMP) }
This is already done for other devices - see \SB.LNKS._CRS. For this to work, the new variable PCIM needs to be set to the size of the PCI region, which can be populated in the SSDT when it is built by SeaBIOS.
As Jan points out though, is a dynamic PCI region really needed? Those that need a large PCI region are also likely to need a large amount of memory. Maybe the space for PCI should just be increased.
-Kevin
Hi,
As Jan points out though, is a dynamic PCI region really needed? Those that need a large PCI region are also likely to need a large amount of memory. Maybe the space for PCI should just be increased.
Just changing it will not work as it will break live migration.
I think one option is to keep the piix-based pc as-is, with the address space above 0xe0000000 being available for PCI, and make the room larger for the upcoming q35-based pc, say starting at 0xc0000000. That will give almost 1G address space. 256 MB are used by pci mmconfig though.
The other option is to make it configurable, which in turn requires a dynamic PCI region.
cheers, Gerd
On 2011-06-06 08:30, Gerd Hoffmann wrote:
Hi,
As Jan points out though, is a dynamic PCI region really needed? Those that need a large PCI region are also likely to need a large amount of memory. Maybe the space for PCI should just be increased.
Just changing it will not work as it will break live migration.
Changing logic in the BIOS won't break migration (the active BIOS is included in the migration of RAM, current mappings are part of the device states). Changing the 4G mapping in qemu's hw/pc_piix.c would break it and needs to be coupled to the machine version.
Jan
On Thu, May 26, 2011 at 06:43:57AM +0200, André Weidemann wrote:
Hi Gerd,
On 25.05.2011 14:45, Gerd Hoffmann wrote:
PS: full patch collection @ http://www.kraxel.org/cgit/seabios/log/?h=kraxel.q35
I checked out your branch under Ubuntu Natty and ran "make". But unfortunately linking failed with the following error(see below).
root@test:/tmp/seabios-pci-two-pass# make Build Kconfig config file Compiling whole program out/ccode.16.s Compiling to assembler out/asm-offsets.s Generating offset file out/asm-offsets.h Compiling (16bit) out/code16.o Compiling whole program out/ccode32flat.o Compiling whole program out/code32seg.o Building ld scripts (version "pre-0.6.3-20110526_043255-test") Fixed space: 0xe05b-0x10000 total: 8101 slack: 2 Percent slack: 0.0% 16bit size: 39408 32bit segmented size: 1636 32bit flat size: 13292 32bit flat init size: 58352 Linking out/rom16.o out/romlayout16.lds:699 cannot move location counter backwards (from 000000000000ca0f to 000000000000c9f4) make: *** [out/rom16.o] Error 1
Natty comes with gcc-4.5.2 and binutils 2.21.0.
You need a newer binutils. 2.21.51.0.9 works fine.
Regards André
SeaBIOS mailing list SeaBIOS@seabios.org http://www.seabios.org/mailman/listinfo/seabios
On Tue, May 24, 2011 at 11:05:32AM +0200, Gerd Hoffmann wrote:
This patch adds a second device scan to the pci initialization, which counts the memory bars of the various sizes and types. Then it calculates the sizes and the packing of the prefetchable and non-prefetchable pci memory windows and prints the results.
Thanks Gerd. In general, I'm okay with this approach.
Some random comments..
+static struct pci_bus {
- /* pci region stats */
- u32 io_count[16 - PCI_IO_INDEX_SHIFT];
- u32 mem_count[32 - PCI_MEM_INDEX_SHIFT];
- u32 prefmem_count[32 - PCI_MEM_INDEX_SHIFT];
- u32 io_sum, io_max;
- u32 mem_sum, mem_max;
- u32 prefmem_sum, prefmem_max;
- /* seconday bus region sizes */
- u32 io_size, mem_size, prefmem_size;
- /* pci region assignments */
- u32 io_bases[16 - PCI_IO_INDEX_SHIFT];
- u32 mem_bases[32 - PCI_MEM_INDEX_SHIFT];
- u32 prefmem_bases[32 - PCI_MEM_INDEX_SHIFT];
- u32 io_base, mem_base, prefmem_base;
+} busses[32];
The size of the seabios rom grows with every static variable, so this should be dynamically allocated.
Speaking of dynamic allocation - it would be great if seabios had a "struct pcidevice" for every found device - then most cases of foreachpci() could instead just walk through this list of pci devices. The lack of this has already lead to hacks like pci.c:pci_path_setup().
+static int pci_size_to_index(u32 size, int shift) +{
- int index = 0;
- while (size > (1 << index)) {
index++;
- }
util.h:__fls()
+static void pci_bios_check_device(struct pci_bus *bus, u16 bdf) +{
[...]
- for (i = 0; i < PCI_NUM_REGIONS; i++) {
u32 val, size;
pci_bios_bus_get_bar(bus, bdf, i, &val, &size);
if (val == 0) {
continue;
}
pci_bios_bus_reserve(bus, val, size);
If I read this correctly, the code reserves unique space for each ROM bar - this shouldn't be necessary as I believe it's safe to assume only one ROM will be mapped at a given time.
-Kevin
On 05/30/11 17:29, Kevin O'Connor wrote:
On Tue, May 24, 2011 at 11:05:32AM +0200, Gerd Hoffmann wrote:
This patch adds a second device scan to the pci initialization, which counts the memory bars of the various sizes and types. Then it calculates the sizes and the packing of the prefetchable and non-prefetchable pci memory windows and prints the results.
Thanks Gerd. In general, I'm okay with this approach.
Some random comments..
+static struct pci_bus {
- /* pci region stats */
- u32 io_count[16 - PCI_IO_INDEX_SHIFT];
- u32 mem_count[32 - PCI_MEM_INDEX_SHIFT];
- u32 prefmem_count[32 - PCI_MEM_INDEX_SHIFT];
- u32 io_sum, io_max;
- u32 mem_sum, mem_max;
- u32 prefmem_sum, prefmem_max;
- /* seconday bus region sizes */
- u32 io_size, mem_size, prefmem_size;
- /* pci region assignments */
- u32 io_bases[16 - PCI_IO_INDEX_SHIFT];
- u32 mem_bases[32 - PCI_MEM_INDEX_SHIFT];
- u32 prefmem_bases[32 - PCI_MEM_INDEX_SHIFT];
- u32 io_base, mem_base, prefmem_base;
+} busses[32];
The size of the seabios rom grows with every static variable, so this should be dynamically allocated.
That has been fixed already. This and other changes can be found in http://www.kraxel.org/cgit/seabios/log/?h=kraxel.q35
The patches need some more work before they can be posted for re-review+merging though.
Speaking of dynamic allocation - it would be great if seabios had a "struct pcidevice" for every found device - then most cases of foreachpci() could instead just walk through this list of pci devices. The lack of this has already lead to hacks like pci.c:pci_path_setup().
I'll have a look.
- for (i = 0; i< PCI_NUM_REGIONS; i++) {
u32 val, size;
pci_bios_bus_get_bar(bus, bdf, i,&val,&size);
if (val == 0) {
continue;
}
pci_bios_bus_reserve(bus, val, size);
If I read this correctly, the code reserves unique space for each ROM bar - this shouldn't be necessary as I believe it's safe to assume only one ROM will be mapped at a given time.
Given the small size roms usually have I dont think this optimization is worth special-casing the rom bar.
cheers, Gerd
Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- src/pciinit.c | 29 +++++++++++++++++++++++++++-- 1 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/src/pciinit.c b/src/pciinit.c index 537eed0..5ae141f 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -344,12 +344,14 @@ static const struct pci_device_id pci_class_tbl[] = { PCI_DEVICE_CLASS(PCI_VENDOR_ID_APPLE, 0x0017, 0xff00, apple_macio_init), PCI_DEVICE_CLASS(PCI_VENDOR_ID_APPLE, 0x0022, 0xff00, apple_macio_init),
+#if 0 /* PCI bridge */ PCI_DEVICE_CLASS(PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_BRIDGE_PCI, pci_bios_init_device_bridge),
/* default */ PCI_DEVICE(PCI_ANY_ID, PCI_ANY_ID, pci_bios_allocate_regions), +#endif
PCI_DEVICE_END, }; @@ -608,6 +610,8 @@ static void pci_bios_map_device(struct pci_bus *bus, u16 bdf) return; } struct pci_bus *s = busses + secbus; + u32 limit; + s->io_base = pci_bios_bus_get_addr (bus, PCI_BASE_ADDRESS_SPACE_IO, s->io_size); s->mem_base = pci_bios_bus_get_addr @@ -616,7 +620,25 @@ static void pci_bios_map_device(struct pci_bus *bus, u16 bdf) (bus, PCI_BASE_ADDRESS_MEM_PREFETCH, s->prefmem_size); dprintf(1, "PCI: init bases bus %d (secondary)\n", secbus); pci_bios_init_bus_bases(s); - /* TODO: commit assignments */ + + limit = s->io_base + s->io_size - 1; + pci_config_writeb(bdf, PCI_IO_BASE, s->io_base >> PCI_IO_SHIFT); + pci_config_writew(bdf, PCI_IO_BASE_UPPER16, 0); + pci_config_writeb(bdf, PCI_IO_LIMIT, limit >> PCI_IO_SHIFT); + pci_config_writew(bdf, PCI_IO_LIMIT_UPPER16, 0); + + limit = s->mem_base + s->mem_size - 1; + pci_config_writew(bdf, PCI_MEMORY_BASE, s->mem_base >> PCI_MEMORY_SHIFT); + pci_config_writew(bdf, PCI_MEMORY_LIMIT, limit >> PCI_MEMORY_SHIFT); + + limit = s->prefmem_base + s->prefmem_size - 1; + pci_config_writew(bdf, PCI_PREF_MEMORY_BASE, + s->prefmem_base >> PCI_PREF_MEMORY_SHIFT); + pci_config_writew(bdf, PCI_PREF_MEMORY_LIMIT, + limit >> PCI_PREF_MEMORY_SHIFT); + pci_config_writel(bdf, PCI_PREF_BASE_UPPER32, 0); + pci_config_writel(bdf, PCI_PREF_LIMIT_UPPER32, 0); + pci_bios_map_device_in_bus(secbus); return; } @@ -632,7 +654,7 @@ static void pci_bios_map_device(struct pci_bus *bus, u16 bdf) dprintf(1, " bar %d, addr %x, size %x [%s]\n", i, addr, size, val & PCI_BASE_ADDRESS_SPACE_IO ? "io" : "mem"); - /* TODO: commit assignments */ + pci_set_io_region_addr(bdf, i, addr);
if (!(val & PCI_BASE_ADDRESS_SPACE_IO) && (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64) { @@ -777,9 +799,12 @@ pci_setup(void) pci_bios_init_root_regions(); pci_bios_map_device_in_bus(0 /* host bus */);
+#if 0 dprintf(1, "=== PCI old allocation pass ===\n"); foreachpci(bdf, max) { pci_init_device(pci_isa_bridge_tbl, bdf, NULL); } +#endif + pci_bios_init_device_in_bus(0 /* host bus */); }