Hi,
This patch series redesigns the existing pciinit.c code and introduces linked list operations. Changes are more about structures definitions rather than functionality. There are no more arrays of bases and counts in new implementation. The new implementation is based on dynamic allocation of pci_region_entry structures. Each pci_region_entry structure could be a PCI bar or a downstream PCI region (bridge). Each entry has a set of attributes: type (IO, MEM, PREFMEM), is64bit, size, base address, PCI device owner, and a pointer to the pci_bus it belongs to.
1. Introduce List operations 2. Add pci_region_entry and linked lists operations in place while still using the array system to do the allocations. 3. Switch to lists operations 4. Get rid of size element from bus->r structure, now we use entry->size for the same purpose
src/pci.h | 5 - src/pciinit.c | 258 ++++++++++++++++++++++++++------------------------------- src/util.h | 21 +++++ 3 files changed, 137 insertions(+), 147 deletions(-)
This linked list implementation is partially based on kernel code. So it should be quite stable
Signed-off-by: Alexey Korolev alexey.korolev@endace.com --- src/util.h | 21 +++++++++++++++++++++ 1 files changed, 21 insertions(+), 0 deletions(-)
diff --git a/src/util.h b/src/util.h index 70d3c4c..17df3cf 100644 --- a/src/util.h +++ b/src/util.h @@ -195,6 +195,27 @@ struct descloc_s { u32 addr; } PACKED;
+// Double linked lists with a single pointer list head +#define list_foreach_entry_safe(head, next, entry) \ + for (entry = head; entry && ({next=entry->next; 1;}); \ + entry = next) + +#define list_del(entry) \ + do { \ + *(entry)->pprev = (entry)->next; \ + if ((entry)->next) \ + (entry)->next->pprev = (entry)->pprev; \ + } while (0) + +#define list_add_head(phead, entry) \ + do { \ + (entry)->next = *(phead); \ + if (*(phead)) \ + (*(phead))->pprev = &(entry)->next; \ + *(phead) = entry; \ + (entry)->pprev = phead; \ + } while (0) + // util.c void cpuid(u32 index, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); struct bregs;
On 03/28/12 06:28, Alexey Korolev wrote:
This linked list implementation is partially based on kernel code. So it should be quite stable
How about just copying the file?
I've used the linux kernel list implementation elsewhere too and it worked just fine with only minor tweaks (remove some likely()/unlikely() macros IIRC).
cheers, Gerd
This linked list implementation is partially based on kernel code. So it should be quite stable
How about just copying the file?
I've used the linux kernel list implementation elsewhere too and it worked just fine with only minor tweaks (remove some likely()/unlikely() macros IIRC).
Right, in "take-2" Kevin has suggested very similar. It will be done as a separate patch, because the changes are not just about pciinit.c, they also simplify pmm.c and stack.c a bit. In this patch I'm submitting only few necessary functions for list operations.
On Wed, Mar 28, 2012 at 04:39:07PM +0200, Gerd Hoffmann wrote:
On 03/28/12 06:28, Alexey Korolev wrote:
This linked list implementation is partially based on kernel code. So it should be quite stable
How about just copying the file?
I've used the linux kernel list implementation elsewhere too and it worked just fine with only minor tweaks (remove some likely()/unlikely() macros IIRC).
Linux is GPLv2 while SeaBIOS is LGPLv3, so some care should be taken.
-Kevin
On Wed, 2012-03-28 at 22:27 -0400, Kevin O'Connor wrote:
On Wed, Mar 28, 2012 at 04:39:07PM +0200, Gerd Hoffmann wrote:
On 03/28/12 06:28, Alexey Korolev wrote:
This linked list implementation is partially based on kernel code. So it should be quite stable
How about just copying the file?
I've used the linux kernel list implementation elsewhere too and it worked just fine with only minor tweaks (remove some likely()/unlikely() macros IIRC).
Linux is GPLv2 while SeaBIOS is LGPLv3, so some care should be taken.
FWIW In Xen we decided to go with the BSD list macros in our LGPL library interfaces to avoid a similar sort issue. http://xenbits.xen.org/hg/xen-unstable.hg/file/74d2af0b56ea/tools/include/xe... has that plus a handy sed script to namespace the interface, although I don't suppose that will matter inside seabios.
Ian.
In this patch the pci_region_entry structure is introduced. The pci_device->bars are removed. The information from pci_region_entry is used to program pci bars.
Signed-off-by: Alexey Korolev alexey.korolev@endace.com --- src/pci.h | 5 -- src/pciinit.c | 116 ++++++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 86 insertions(+), 35 deletions(-)
diff --git a/src/pci.h b/src/pci.h index a2a5a4c..5598100 100644 --- a/src/pci.h +++ b/src/pci.h @@ -51,11 +51,6 @@ struct pci_device { u8 prog_if, revision; u8 header_type; u8 secondary_bus; - struct { - u32 addr; - u32 size; - int is64; - } bars[PCI_NUM_REGIONS];
// Local information on device. int have_driver; diff --git a/src/pciinit.c b/src/pciinit.c index 9f3fdd4..6a285c9 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -31,6 +31,20 @@ static const char *region_type_name[] = { [ PCI_REGION_TYPE_PREFMEM ] = "prefmem", };
+struct pci_bus; +struct pci_region_entry { + struct pci_device *dev; + int bar; + u32 base; + u32 size; + int is64bit; + enum pci_region_type type; + struct pci_bus *this_bus; + struct pci_bus *parent_bus; + struct pci_region_entry *next; + struct pci_region_entry **pprev; +}; + struct pci_bus { struct { /* pci region stats */ @@ -41,6 +55,7 @@ struct pci_bus { /* pci region assignments */ u32 bases[32 - PCI_MEM_INDEX_SHIFT]; u32 base; + struct pci_region_entry *list; } r[PCI_REGION_TYPE_COUNT]; struct pci_device *bus_dev; }; @@ -352,6 +367,29 @@ pci_bios_get_bar(struct pci_device *pci, int bar, u32 *val, u32 *size) *size = (~(*val & mask)) + 1; }
+/**************************************************************** + * Build topology and calculate size of entries + ****************************************************************/ + +struct pci_region_entry * +pci_region_create_entry(struct pci_bus *bus, struct pci_device *dev, + u32 size, int type, int is64bit) +{ + struct pci_region_entry *entry= malloc_tmp(sizeof(*entry)); + if (!entry) { + warn_noalloc(); + return NULL; + } + memset(entry, 0, sizeof(*entry)); + entry->dev = dev; + entry->type = type; + entry->is64bit = is64bit; + entry->size = size; + list_add_head(&bus->r[type].list, entry); + entry->parent_bus = bus; + return entry; +} + static void pci_bios_bus_reserve(struct pci_bus *bus, int type, u32 size) { u32 index; @@ -364,9 +402,10 @@ static void pci_bios_bus_reserve(struct pci_bus *bus, int type, u32 size) bus->r[type].max = size; }
-static void pci_bios_check_devices(struct pci_bus *busses) +static int pci_bios_check_devices(struct pci_bus *busses) { dprintf(1, "PCI: check devices\n"); + struct pci_region_entry *entry;
// Calculate resources needed for regular (non-bus) devices. struct pci_device *pci; @@ -382,15 +421,22 @@ static void pci_bios_check_devices(struct pci_bus *busses) pci_bios_get_bar(pci, i, &val, &size); if (val == 0) continue; - - pci_bios_bus_reserve(bus, pci_addr_to_type(val), size); - pci->bars[i].addr = val; - pci->bars[i].size = size; - pci->bars[i].is64 = (!(val & PCI_BASE_ADDRESS_SPACE_IO) && + int type = pci_addr_to_type(val); + int min_size = (type == PCI_REGION_TYPE_IO) ? + (1<<PCI_IO_INDEX_SHIFT) : (1<<PCI_MEM_INDEX_SHIFT); + size = (size > min_size) ? size : min_size; + int is64bit = (!(val & PCI_BASE_ADDRESS_SPACE_IO) && (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64);
- if (pci->bars[i].is64) + pci_bios_bus_reserve(bus, pci_addr_to_type(val), size); + + entry = pci_region_create_entry(bus, pci, size, type, is64bit); + if (!entry) + return -1; + entry->bar = i; + + if (is64bit) i++; } } @@ -411,6 +457,11 @@ static void pci_bios_check_devices(struct pci_bus *busses) s->r[type].size = limit; s->r[type].size = pci_size_roundup(s->r[type].size); pci_bios_bus_reserve(parent, type, s->r[type].size); + entry = pci_region_create_entry(parent, s->bus_dev, + s->r[type].size, type, 0); + if (!entry) + return -1; + entry->this_bus = s; } dprintf(1, "PCI: secondary bus %d sizes: io %x, mem %x, prefmem %x\n", secondary_bus, @@ -418,6 +469,7 @@ static void pci_bios_check_devices(struct pci_bus *busses) s->r[PCI_REGION_TYPE_MEM].size, s->r[PCI_REGION_TYPE_PREFMEM].size); } + return 0; }
#define ROOT_BASE(top, sum, max) ALIGN_DOWN((top)-(sum),(max) ?: 1) @@ -526,28 +578,30 @@ static void pci_bios_map_devices(struct pci_bus *busses) }
// Map regions on each device. - struct pci_device *pci; - foreachpci(pci) { - if (pci->class == PCI_CLASS_BRIDGE_PCI) - continue; - u16 bdf = pci->bdf; - dprintf(1, "PCI: map device bdf=%02x:%02x.%x\n" - , pci_bdf_to_bus(bdf), pci_bdf_to_dev(bdf), pci_bdf_to_fn(bdf)); - struct pci_bus *bus = &busses[pci_bdf_to_bus(bdf)]; - int i; - for (i = 0; i < PCI_NUM_REGIONS; i++) { - if (pci->bars[i].addr == 0) - continue; - - int type = pci_addr_to_type(pci->bars[i].addr); - u32 addr = pci_bios_bus_get_addr(bus, type, pci->bars[i].size); - dprintf(1, " bar %d, addr %x, size %x [%s]\n", - i, addr, pci->bars[i].size, region_type_name[type]); - pci_set_io_region_addr(pci, i, addr); - - if (pci->bars[i].is64) { - i++; - pci_set_io_region_addr(pci, i, 0); + int bus; + struct pci_region_entry *entry, *next; + for (bus = 0; bus<=MaxPCIBus; bus++) { + int type; + for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { + list_foreach_entry_safe(busses[bus].r[type].list, + next, entry) { + if (!entry->this_bus) { + entry->base = pci_bios_bus_get_addr(&busses[bus], + entry->type, entry->size); + pci_set_io_region_addr(entry->dev, entry->bar, entry->base); + if (entry->is64bit) + pci_set_io_region_addr(entry->dev, entry->bar, 0); + + dprintf(1, "PCI: map device bdf=%02x:%02x.%x \tbar %d" + "\tsize\t0x%08x\tbase 0x%x type %s\n", + pci_bdf_to_bus(entry->dev->bdf), + pci_bdf_to_dev(entry->dev->bdf), + pci_bdf_to_fn(entry->dev->bdf), + entry->bar, entry->size, entry->base, + region_type_name[entry->type]); + } + list_del(entry); + free(entry); } } } @@ -588,7 +642,9 @@ pci_setup(void) return; } memset(busses, 0, sizeof(*busses) * (MaxPCIBus + 1)); - pci_bios_check_devices(busses); + if (pci_bios_check_devices(busses)) + return; + if (pci_bios_init_root_regions(&busses[0], start, end) != 0) { panic("PCI: out of address space\n"); }
In this patch instead of array based resource allocation approach we calculate resource addresses linked lists of pci_region_entry structures.
Signed-off-by: Alexey Korolev alexey.korolev@endace.com --- src/pciinit.c | 179 ++++++++++++++++----------------------------------------- 1 files changed, 50 insertions(+), 129 deletions(-)
diff --git a/src/pciinit.c b/src/pciinit.c index 6a285c9..85fe823 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -12,9 +12,8 @@ #include "pci_regs.h" // PCI_COMMAND #include "xen.h" // usingXen
-#define PCI_IO_INDEX_SHIFT 2 -#define PCI_MEM_INDEX_SHIFT 12 - +#define PCI_DEVICE_IO_MIN 0x4 +#define PCI_DEVICE_MEM_MIN 0x1000 #define PCI_BRIDGE_IO_MIN 0x1000 #define PCI_BRIDGE_MEM_MIN 0x100000
@@ -48,38 +47,14 @@ struct pci_region_entry { struct pci_bus { struct { /* pci region stats */ - u32 count[32 - PCI_MEM_INDEX_SHIFT]; u32 sum, max; - /* seconday bus region sizes */ u32 size; - /* pci region assignments */ - u32 bases[32 - PCI_MEM_INDEX_SHIFT]; u32 base; struct pci_region_entry *list; } r[PCI_REGION_TYPE_COUNT]; struct pci_device *bus_dev; };
-static int pci_size_to_index(u32 size, enum pci_region_type type) -{ - int index = __fls(size); - int shift = (type == PCI_REGION_TYPE_IO) ? - PCI_IO_INDEX_SHIFT : PCI_MEM_INDEX_SHIFT; - - if (index < shift) - index = shift; - index -= shift; - return index; -} - -static u32 pci_index_to_size(int index, enum pci_region_type type) -{ - int shift = (type == PCI_REGION_TYPE_IO) ? - PCI_IO_INDEX_SHIFT : PCI_MEM_INDEX_SHIFT; - - return 0x1 << (index + shift); -} - static enum pci_region_type pci_addr_to_type(u32 addr) { if (addr & PCI_BASE_ADDRESS_SPACE_IO) @@ -387,19 +362,11 @@ pci_region_create_entry(struct pci_bus *bus, struct pci_device *dev, entry->size = size; list_add_head(&bus->r[type].list, entry); entry->parent_bus = bus; - return entry; -} - -static void pci_bios_bus_reserve(struct pci_bus *bus, int type, u32 size) -{ - u32 index;
- index = pci_size_to_index(size, type); - size = pci_index_to_size(index, type); - bus->r[type].count[index]++; bus->r[type].sum += size; if (bus->r[type].max < size) bus->r[type].max = size; + return entry; }
static int pci_bios_check_devices(struct pci_bus *busses) @@ -423,14 +390,12 @@ static int pci_bios_check_devices(struct pci_bus *busses) continue; int type = pci_addr_to_type(val); int min_size = (type == PCI_REGION_TYPE_IO) ? - (1<<PCI_IO_INDEX_SHIFT) : (1<<PCI_MEM_INDEX_SHIFT); + PCI_DEVICE_IO_MIN : PCI_DEVICE_MEM_MIN; size = (size > min_size) ? size : min_size; int is64bit = (!(val & PCI_BASE_ADDRESS_SPACE_IO) && (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64);
- pci_bios_bus_reserve(bus, pci_addr_to_type(val), size); - entry = pci_region_create_entry(bus, pci, size, type, is64bit); if (!entry) return -1; @@ -456,7 +421,6 @@ static int pci_bios_check_devices(struct pci_bus *busses) if (s->r[type].size < limit) s->r[type].size = limit; s->r[type].size = pci_size_roundup(s->r[type].size); - pci_bios_bus_reserve(parent, type, s->r[type].size); entry = pci_region_create_entry(parent, s->bus_dev, s->r[type].size, type, 0); if (!entry) @@ -496,112 +460,69 @@ static int pci_bios_init_root_regions(struct pci_bus *bus, u32 start, u32 end)
/**************************************************************** - * BAR assignment + * Map pci region entries ****************************************************************/
-static void pci_bios_init_bus_bases(struct pci_bus *bus) +static void pci_region_map_one_entry(struct pci_region_entry *entry) { - u32 base, newbase, size; - int type, i; - - for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { - dprintf(1, " type %s max %x sum %x base %x\n", region_type_name[type], - bus->r[type].max, bus->r[type].sum, bus->r[type].base); - base = bus->r[type].base; - for (i = ARRAY_SIZE(bus->r[type].count)-1; i >= 0; i--) { - size = pci_index_to_size(i, type); - if (!bus->r[type].count[i]) - continue; - newbase = base + size * bus->r[type].count[i]; - dprintf(1, " size %8x: %d bar(s), %8x -> %8x\n", - size, bus->r[type].count[i], base, newbase - 1); - bus->r[type].bases[i] = base; - base = newbase; - } + if (!entry->this_bus ) { + dprintf(1, "PCI: bdf %d bar %d\tsize\t0x%08x\tbase 0x%x type %s\n", + entry->dev->bdf, entry->bar, entry->size, entry->base, + region_type_name[entry->type]); + + pci_set_io_region_addr(entry->dev, entry->bar, entry->base); + if (entry->is64bit) + pci_set_io_region_addr(entry->dev, entry->bar + 1, 0); + return; } -} - -static u32 pci_bios_bus_get_addr(struct pci_bus *bus, int type, u32 size) -{ - u32 index, addr; - - index = pci_size_to_index(size, type); - addr = bus->r[type].bases[index]; - bus->r[type].bases[index] += pci_index_to_size(index, type); - return addr; -}
-#define PCI_IO_SHIFT 8 -#define PCI_MEMORY_SHIFT 16 -#define PCI_PREF_MEMORY_SHIFT 16 + entry->this_bus->r[entry->type].base = entry->base; + dprintf(1, "PCI-2-PCI bus %d\t\tsize\t0x%08x\tbase 0x%x type %s\n", + entry->dev->secondary_bus, entry->size, entry->base, + region_type_name[entry->type]);
-static void pci_bios_map_devices(struct pci_bus *busses) -{ - // Setup bases for root bus. - dprintf(1, "PCI: init bases bus 0 (primary)\n"); - pci_bios_init_bus_bases(&busses[0]); - - // Map regions on each secondary bus. - int secondary_bus; - for (secondary_bus=1; secondary_bus<=MaxPCIBus; secondary_bus++) { - struct pci_bus *s = &busses[secondary_bus]; - if (!s->bus_dev) - continue; - u16 bdf = s->bus_dev->bdf; - struct pci_bus *parent = &busses[pci_bdf_to_bus(bdf)]; - int type; - for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { - s->r[type].base = pci_bios_bus_get_addr( - parent, type, s->r[type].size); - } - dprintf(1, "PCI: init bases bus %d (secondary)\n", secondary_bus); - pci_bios_init_bus_bases(s); - - u32 base = s->r[PCI_REGION_TYPE_IO].base; - u32 limit = base + s->r[PCI_REGION_TYPE_IO].size - 1; - pci_config_writeb(bdf, PCI_IO_BASE, base >> PCI_IO_SHIFT); + u16 bdf = entry->dev->bdf; + u32 base = entry->base; + u32 limit = entry->base + entry->size - 1; + if (entry->type == PCI_REGION_TYPE_IO) { + pci_config_writeb(bdf, PCI_IO_BASE, base >> 8); pci_config_writew(bdf, PCI_IO_BASE_UPPER16, 0); - pci_config_writeb(bdf, PCI_IO_LIMIT, limit >> PCI_IO_SHIFT); + pci_config_writeb(bdf, PCI_IO_LIMIT, limit >> 8); pci_config_writew(bdf, PCI_IO_LIMIT_UPPER16, 0); - - base = s->r[PCI_REGION_TYPE_MEM].base; - limit = base + s->r[PCI_REGION_TYPE_MEM].size - 1; - pci_config_writew(bdf, PCI_MEMORY_BASE, base >> PCI_MEMORY_SHIFT); - pci_config_writew(bdf, PCI_MEMORY_LIMIT, limit >> PCI_MEMORY_SHIFT); - - base = s->r[PCI_REGION_TYPE_PREFMEM].base; - limit = base + s->r[PCI_REGION_TYPE_PREFMEM].size - 1; - pci_config_writew(bdf, PCI_PREF_MEMORY_BASE, base >> PCI_PREF_MEMORY_SHIFT); - pci_config_writew(bdf, PCI_PREF_MEMORY_LIMIT, limit >> PCI_PREF_MEMORY_SHIFT); + } + if (entry->type == PCI_REGION_TYPE_MEM) { + pci_config_writew(bdf, PCI_MEMORY_BASE, base >> 16); + pci_config_writew(bdf, PCI_MEMORY_LIMIT, limit >> 16); + } + if (entry->type == PCI_REGION_TYPE_PREFMEM) { + pci_config_writew(bdf, PCI_PREF_MEMORY_BASE, base >> 16); + pci_config_writew(bdf, PCI_PREF_MEMORY_LIMIT, limit >> 16); pci_config_writel(bdf, PCI_PREF_BASE_UPPER32, 0); pci_config_writel(bdf, PCI_PREF_LIMIT_UPPER32, 0); } + return; +}
- // Map regions on each device. - int bus; +static void pci_bios_map_devices(struct pci_bus *busses) +{ struct pci_region_entry *entry, *next; + + int bus; for (bus = 0; bus<=MaxPCIBus; bus++) { int type; for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { - list_foreach_entry_safe(busses[bus].r[type].list, - next, entry) { - if (!entry->this_bus) { - entry->base = pci_bios_bus_get_addr(&busses[bus], - entry->type, entry->size); - pci_set_io_region_addr(entry->dev, entry->bar, entry->base); - if (entry->is64bit) - pci_set_io_region_addr(entry->dev, entry->bar, 0); - - dprintf(1, "PCI: map device bdf=%02x:%02x.%x \tbar %d" - "\tsize\t0x%08x\tbase 0x%x type %s\n", - pci_bdf_to_bus(entry->dev->bdf), - pci_bdf_to_dev(entry->dev->bdf), - pci_bdf_to_fn(entry->dev->bdf), - entry->bar, entry->size, entry->base, - region_type_name[entry->type]); + u32 size; + for (size = busses[bus].r[type].max; size > 0; size >>= 1) { + list_foreach_entry_safe(busses[bus].r[type].list, + next, entry) { + if (size == entry->size) { + entry->base = busses[bus].r[type].base; + busses[bus].r[type].base += size; + pci_region_map_one_entry(entry); + list_del(entry); + free(entry); + } } - list_del(entry); - free(entry); } } }
On Wed, Mar 28, 2012 at 05:54:10PM +1300, Alexey Korolev wrote:
In this patch instead of array based resource allocation approach we calculate resource addresses linked lists of pci_region_entry structures.
Thanks. I still think this migration can be done more seamlessly. I played with your patches a bit and came up with the attached patches that do just code movement - no alogorithm changes. See the attached.
Also, I think we should look to commit after the next SeaBIOS release.
-Kevin
On Sun, Apr 01, 2012 at 03:28:34AM -0400, Kevin O'Connor wrote:
On Wed, Mar 28, 2012 at 05:54:10PM +1300, Alexey Korolev wrote:
In this patch instead of array based resource allocation approach we calculate resource addresses linked lists of pci_region_entry structures.
Thanks. I still think this migration can be done more seamlessly. I played with your patches a bit and came up with the attached patches that do just code movement - no alogorithm changes. See the attached.
Also, I think we should look to commit after the next SeaBIOS release.
Looking closer at your new allocation algorithm, I see that it effectively allocates largest sizes first. It occurred to me that an easy way to do this is to keep the pci_region_entry list sorted by size. See the patch below as an example (based on top of the previous email I sent).
-Kevin
diff --git a/src/pciinit.c b/src/pciinit.c index f2c839a..bfee178 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -41,16 +41,13 @@ struct pci_region_entry { struct pci_bus *child_bus; struct pci_bus *parent_bus; struct pci_region_entry *next; - struct pci_region_entry **pprev; };
struct pci_bus { struct { /* pci region stats */ - u32 count[32 - PCI_MEM_INDEX_SHIFT]; u32 sum, max; /* pci region assignments */ - u32 bases[32 - PCI_MEM_INDEX_SHIFT]; u32 base; struct pci_region_entry *list; } r[PCI_REGION_TYPE_COUNT]; @@ -379,11 +376,18 @@ pci_region_create_entry(struct pci_bus *bus, struct pci_device *dev, entry->is64 = is64; entry->type = type; entry->parent_bus = bus; - list_add_head(&bus->r[type].list, entry); + // Insert into list in sorted order. + struct pci_region_entry **pprev; + for (pprev = &bus->r[type].list; *pprev; pprev = &(*pprev)->next) { + struct pci_region_entry *pos = *pprev; + if (pos->size < size) + break; + } + entry->next = *pprev; + *pprev = entry;
u32 index = pci_size_to_index(size, type); size = pci_index_to_size(index, type); - bus->r[type].count[index]++; bus->r[type].sum += size; if (bus->r[type].max < size) bus->r[type].max = size; @@ -478,46 +482,14 @@ static int pci_bios_init_root_regions(struct pci_bus *bus, u32 start, u32 end) * BAR assignment ****************************************************************/
-static void pci_bios_init_bus_bases(struct pci_bus *bus) -{ - u32 base, newbase, size; - int type, i; - - for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { - dprintf(1, " type %s max %x sum %x base %x\n", region_type_name[type], - bus->r[type].max, bus->r[type].sum, bus->r[type].base); - base = bus->r[type].base; - for (i = ARRAY_SIZE(bus->r[type].count)-1; i >= 0; i--) { - size = pci_index_to_size(i, type); - if (!bus->r[type].count[i]) - continue; - newbase = base + size * bus->r[type].count[i]; - dprintf(1, " size %8x: %d bar(s), %8x -> %8x\n", - size, bus->r[type].count[i], base, newbase - 1); - bus->r[type].bases[i] = base; - base = newbase; - } - } -} - -static u32 pci_bios_bus_get_addr(struct pci_bus *bus, int type, u32 size) -{ - u32 index, addr; - - index = pci_size_to_index(size, type); - addr = bus->r[type].bases[index]; - bus->r[type].bases[index] += pci_index_to_size(index, type); - return addr; -} - #define PCI_IO_SHIFT 8 #define PCI_MEMORY_SHIFT 16 #define PCI_PREF_MEMORY_SHIFT 16
static void pci_region_map_one_entry(struct pci_region_entry *entry) { - u32 addr = pci_bios_bus_get_addr( - entry->parent_bus, entry->type, entry->size); + u32 addr = entry->parent_bus->r[entry->type].base; + entry->parent_bus->r[entry->type].base += entry->size; if (!entry->child_bus) { dprintf(1, "PCI: map device bdf=%02x:%02x.%x" " bar %d, addr %08x, size %08x [%s]\n", @@ -559,15 +531,14 @@ static void pci_bios_map_devices(struct pci_bus *busses) // Map regions on each device. int bus; for (bus = 0; bus<=MaxPCIBus; bus++) { - dprintf(1, "PCI: init bases bus %d\n", bus); - pci_bios_init_bus_bases(&busses[bus]); int type; for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { - struct pci_region_entry *entry, *next; - list_foreach_entry_safe(busses[bus].r[type].list, next, entry) { + struct pci_region_entry *entry = busses[bus].r[type].list; + while (entry) { pci_region_map_one_entry(entry); - list_del(entry); + struct pci_region_entry *next = entry->next; free(entry); + entry = next; } } }
Hi Kevin,
Thank you for the patches! I've created a diff of final version of your changes over mine, to make it clear what has changed.
Rather than including the complete diff, I've just left relevant parts and added comments.
--- a/src/pciinit.c +++ b/src/pciinit.c@@ -12,8 +12,9 @@ ........ @@ -34,25 +35,44 @@ struct pci_region_entry { struct pci_device *dev; int bar; - u32 base; u32 size; - int is64bit; + int is64; enum pci_region_type type; - struct pci_bus *this_bus; + struct pci_bus *child_bus;
Structure members naming was one of difficult things when I was writing the code. The child_bus might be a bit confusing as people may thing that it describes a child bus in the bus topology,in fact this element describes the bus this pci_region_entry is representing.
.......
+ +static int pci_size_to_index(u32 size, enum pci_region_type type) +{ + int index = __fls(size); + int shift = (type == PCI_REGION_TYPE_IO) ? + PCI_IO_INDEX_SHIFT : PCI_MEM_INDEX_SHIFT; + + if (index < shift) + index = shift; + index -= shift; + return index; +} + +static u32 pci_index_to_size(int index, enum pci_region_type type) +{ + int shift = (type == PCI_REGION_TYPE_IO) ? + PCI_IO_INDEX_SHIFT : PCI_MEM_INDEX_SHIFT; + + return 0x1 << (index + shift); +}
The only purpose to have these functions is to define the minimum size of pci BAR. They are used only once. What if we add size adjustment to pci_region_create_entry, or just create a function like pci_adjust_size(u32 size, enum pci_region_type type, int bridge)?
.........
- list_add_head(&bus->r[type].list, entry); entry->parent_bus = bus; - + // Insert into list in sorted order. + struct pci_region_entry **pprev; + for (pprev = &bus->r[type].list; *pprev; pprev = &(*pprev)->next) { + struct pci_region_entry *pos = *pprev; + if (pos->size < size) + break; + } + entry->next = *pprev; + *pprev = entry; + .......... static void pci_bios_map_devices(struct pci_bus *busses) { - struct pci_region_entry *entry, *next; - + // Map regions on each device. int bus; for (bus = 0; bus<=MaxPCIBus; bus++) { int type; for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { - u32 size; - for (size = busses[bus].r[type].max; size > 0; size >>= 1) { - list_foreach_entry_safe(busses[bus].r[type].list, - next, entry) { - if (size == entry->size) { - entry->base = busses[bus].r[type].base; - busses[bus].r[type].base += size; - pci_region_map_one_entry(entry); - list_del(entry); - free(entry); - } - } + struct pci_region_entry *entry = busses[bus].r[type].list; + while (entry) { + pci_region_map_one_entry(entry); + struct pci_region_entry *next = entry->next; + free(entry); + entry = next; } } } Right, instead of sorting entries by size in pci_bios_map_devices, the entries are sorted when they are created. This makes the implementation simpler. Note: In case of 64bit BARs we have to migrate entries, so just sorting on create won't be enough, we should also add sorting when entries are migrated. This will add some more complexity, while in case of old implementation complexity will remain the same. I expect it shouldn't be that complicated, so any of these approaches are fine for me.
On Tue, Apr 03, 2012 at 06:39:22PM +1200, Alexey Korolev wrote:
Hi Kevin,
Thank you for the patches! I've created a diff of final version of your changes over mine, to make it clear what has changed.
Rather than including the complete diff, I've just left relevant parts and added comments.
Yes - there isn't much difference between your patches and my patches. I was really just playing with your patch.
Structure members naming was one of difficult things when I was writing the code. The child_bus might be a bit confusing as people may thing that it describes a child bus in the bus topology,in fact this element describes the bus this pci_region_entry is representing.
On Sunday, it occurred to me that we really don't need either parent_bus or this_bus.
+static int pci_size_to_index(u32 size, enum pci_region_type type)
[...]
The only purpose to have these functions is to define the minimum size of pci BAR. They are used only once. What if we add size adjustment to pci_region_create_entry, or just create a function like pci_adjust_size(u32 size, enum pci_region_type type, int bridge)?
Agreed - the only thing it does is force a minimum size for memory bars as you pointed out in a previous email.
As above, I did play with this a little more on Sunday. I also added in two patches from Gerd's series and made alignment handling more explicit. I'm including the series here if you're interested. Again, I think this should wait until after the 1.7.0 release.
-Kevin
On 04/04/12 15:31, Kevin O'Connor wrote:
Agreed - the only thing it does is force a minimum size for memory bars as you pointed out in a previous email. As above, I did play with this a little more on Sunday. I also added in two patches from Gerd's series and made alignment handling more explicit. I'm including the series here if you're interested. Again, I think this should wait until after the 1.7.0 release. -Kevin
Hi Kevin,
Sorry for late response, was quite busy. I've reviewed and tried your patches.
[Patches 1-4] are almost the same as I proposed in this series. The noticeable differences are: 1) instead of sorting entries at mapping stage your patches sort entries at allocation stage. (no difference in behavior or readability so any approach is fine for me) 2) instead of storing pointer to bus resource which the bridge device represents (this_bus), it is obtained from pci structure and array of "busses". - entry->this_bus->r[entry->type].base = entry->base; + struct pci_bus *child_bus = &busses[entry->dev->secondary_bus]; + child_bus->r[entry->type].base = addr; Since "entry->this_bus" member is removed the information about whether the resource is bridge region or PCI BAR is encoded inside "entry->bar". Value "-1" - means this is a bridge region, the positive values mean it is a BAR. IMHO 2) makes code less readable, at least a comment explaining the meaning of negative value of PCI BAR is required. I've found just cosmetic difference to my patches so please don't forget to add my sign-off-by to them.
[Patch 5] Track region alignment explicitly. Looks very good and safe. Should reduce address range wastes.
[Patch 6] Small patch to account resources of PCI bridges. Looks fine. May be instead of + num_regions = 2; I would add #define PCI_BRIDGE_NUM_REGIONS 2 ..... + num_regions = PCI_BRIDGE_NUM_REGIONS;
[Patch 7] 64bit aware code. Patch is incomplete. It is not working.
After 1.7.0 is fine.
On Thu, Apr 12, 2012 at 02:44:54PM +1200, Alexey Korolev wrote:
On 04/04/12 15:31, Kevin O'Connor wrote:
Agreed - the only thing it does is force a minimum size for memory bars as you pointed out in a previous email. As above, I did play with this a little more on Sunday. I also added in two patches from Gerd's series and made alignment handling more explicit. I'm including the series here if you're interested. Again, I think this should wait until after the 1.7.0 release. -Kevin
Hi Kevin,
Sorry for late response, was quite busy. I've reviewed and tried your patches.
[Patches 1-4] are almost the same as I proposed in this series.
Yes. It's your patches tweaked slightly.
The noticeable differences are:
- instead of sorting entries at mapping stage your patches sort entries at allocation stage. (no difference in behavior or readability so
any approach is fine for me) 2) instead of storing pointer to bus resource which the bridge device represents (this_bus), it is obtained from pci structure and array of "busses".
entry->this_bus->r[entry->type].base = entry->base;
- struct pci_bus *child_bus = &busses[entry->dev->secondary_bus];
- child_bus->r[entry->type].base = addr;
Since "entry->this_bus" member is removed the information about whether the resource is bridge region or PCI BAR is encoded inside "entry->bar". Value "-1" - means this is a bridge region, the positive values mean it is a BAR. IMHO 2) makes code less readable, at least a comment explaining the meaning of negative value of PCI BAR is required. I've found just cosmetic difference to my patches so please don't forget to add my sign-off-by to them.
Okay - will do.
[Patch 5] Track region alignment explicitly. Looks very good and safe. Should reduce address range wastes.
[Patch 6] Small patch to account resources of PCI bridges. Looks fine. May be instead of
num_regions = 2;
I would add #define PCI_BRIDGE_NUM_REGIONS 2 .....
num_regions = PCI_BRIDGE_NUM_REGIONS;
This was me playing with some of Gerd's patches. I think it would require additional changes before committing. In particular, the patch misses the ROM bar on bridges - I think maybe it should be changed to keep the same loop constraints but add a "if (bar > PCI_BRIDGE_NUM_REGIONS && bar < PCI_ROM_SLOT) continue;".
[Patch 7] 64bit aware code. Patch is incomplete. It is not working.
This was also me playing with one of Gerd's patches. It just makes the bar read/write code 64bit aware. It doesn't actually program them. The logic to do real 64bit allocations would require list merging. Is this something you have looked at?
-Kevin
On 12/04/12 15:15, Kevin O'Connor wrote:
This was also me playing with one of Gerd's patches. It just makes the bar read/write code 64bit aware. It doesn't actually program them. The logic to do real 64bit allocations would require list merging. Is this something you have looked at?
Right. I see what you mean here. Shall I play around with 64bit support on top of these patches?
On Thu, Apr 12, 2012 at 03:28:02PM +1200, Alexey Korolev wrote:
On 12/04/12 15:15, Kevin O'Connor wrote:
This was also me playing with one of Gerd's patches. It just makes the bar read/write code 64bit aware. It doesn't actually program them. The logic to do real 64bit allocations would require list merging. Is this something you have looked at?
Right. I see what you mean here. Shall I play around with 64bit support on top of these patches?
If that makes sense, then sure.
-Kevin
On Thu, Apr 12, 2012 at 03:28:02PM +1200, Alexey Korolev wrote:
On 12/04/12 15:15, Kevin O'Connor wrote:
This was also me playing with one of Gerd's patches. It just makes the bar read/write code 64bit aware. It doesn't actually program them. The logic to do real 64bit allocations would require list merging. Is this something you have looked at?
Right. I see what you mean here. Shall I play around with 64bit support on top of these patches?
If that makes sense, then sure.
-Kevin
Hi Kevin,
Here is the whole series of patches including 64bit support. I don't send this in a different thread as this continues the previous discussion.
To save your time on parsing I've added little descriptions of changes comparing to patches you have sent.
[Patches 1-4] The same as previous patches just comment about bar=-1 is added
[Patch 5] Track-alignment-explicitly Almost the same as the previous, just changed priority from r->align to r->sum when setting start address of root regions.
I guess there are more chances to fit memory regions if we try place regions with higher r->sum like it was before. Consider default config #define BUILD_PCIMEM_START 0xe0000000 #define BUILD_PCIMEM_END 0xfec00000
Image we have 1 pref. mem. region of 128MB. And many small memory regions which take rest of available 492MB - 128MB If we have alignment priority. PCI pref memory region will start from F000 0000 and PCI memoryregion will start from 0xe0000000 and do not fit.
If we choose size based priority it will fit.
[Patch 6] pciinit-bridges-can-have-two-regions-too Changed as we discussed. Now taking into account ROM regions too.
[Patch 7] Switch-to-64bit-variable-types.patch Same as prvious
[Patch 8 ] New: The pci_region structure is added. Move setting of bus base address to pci_region_map_entries.
[Patch 9 ] New: Add discovery if bridge region is 64bit is capable.
[Patch 10] New: Migrate 64bit entries to 64bit pci regions if they do not fit in 32bit range. Pci region stats are now calculated. Added protection when total size of PCI resources is over 4GB.
[Patch 11] New: This patch solves issues on Windows guests, when 64bit BAR's are present. It is also helpful on Linux guests when use_crs kernel boot option is set.
On Thu, Apr 19, 2012 at 07:00:41PM +1200, Alexey Korolev wrote:
Here is the whole series of patches including 64bit support.
Thanks.
[...]
[Patch 5] Track-alignment-explicitly Almost the same as the previous, just changed priority from r->align to r->sum when setting start address of root regions.
I guess there are more chances to fit memory regions if we try place regions with higher r->sum like it was before. Consider default config #define BUILD_PCIMEM_START 0xe0000000 #define BUILD_PCIMEM_END 0xfec00000
Image we have 1 pref. mem. region of 128MB. And many small memory regions which take rest of available 492MB - 128MB If we have alignment priority. PCI pref memory region will start from F000 0000 and PCI memoryregion will start from 0xe0000000 and do not fit.
The section with the lowest alignment would get allocated first and be at a higher address. So, in your example, the regular memory region would be assigned 0xe8000000 and then pref mem would be assigned 0xe0000000.
Your example is why alignment should be used instead of size. If size is used, then what you cite above occurs (pref mem has the lower size and is thus allocated first at the higher address).
[Patch 10] New: Migrate 64bit entries to 64bit pci regions if they do not fit in 32bit range. Pci region stats are now calculated. Added protection when total size of PCI resources is over 4GB.
Patches 1-9 and 11 look okay to me.
On patch 10, it would be preferable to separate the dynamic calculation of sum/alignment changes from the 64bit support. Otherwise, the core algorithm of patch 10 looks okay. Though it seems like the code is recalculating sum/alignment more than needed, and I think the list manipulation in pci_region_migrate_64bit_entries could be streamlined.
Thanks again, -Kevin
[...]
[Patch 5] Track-alignment-explicitly Almost the same as the previous, just changed priority from r->align to r->sum when setting start address of root regions.
I guess there are more chances to fit memory regions if we try place regions with higher r->sum like it was before. Consider default config #define BUILD_PCIMEM_START 0xe0000000 #define BUILD_PCIMEM_END 0xfec00000
Image we have 1 pref. mem. region of 128MB. And many small memory regions which take rest of available 492MB - 128MB If we have alignment priority. PCI pref memory region will start from F000 0000 and PCI memoryregion will start from 0xe0000000 and do not fit.
The section with the lowest alignment would get allocated first and be at a higher address. So, in your example, the regular memory region would be assigned 0xe8000000 and then pref mem would be assigned 0xe0000000.
Your example is why alignment should be used instead of size. If size is used, then what you cite above occurs (pref mem has the lower size and is thus allocated first at the higher address).
Ahh the lowest alignment! In other words if we go from PCIMEM_START to PCIMEM_END we first assign address range for the region with the largest PCI resource size (highest alignment). I see. I will put it back then.
....
On patch 10, it would be preferable to separate the dynamic calculation of sum/alignment changes from the 64bit support. Otherwise, the core algorithm of patch 10 looks okay. Though it seems like the code is recalculating sum/alignment more than needed, and I think the list manipulation in pci_region_migrate_64bit_entries could be streamlined.
Right, sum/alignment recalculation is important for migration. Will split it and submit new series.
Thanks, Alexey
The 'size' element of pci_bus->r structure is no longer need as the information about bridge region size is already stored in pci_region_entry structure.
Signed-off-by: Alexey Korolev alexey.korolev@endace.com --- src/pciinit.c | 21 +++++++++------------ 1 files changed, 9 insertions(+), 12 deletions(-)
diff --git a/src/pciinit.c b/src/pciinit.c index 85fe823..f07153a 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -48,7 +48,6 @@ struct pci_bus { struct { /* pci region stats */ u32 sum, max; - u32 size; u32 base; struct pci_region_entry *list; } r[PCI_REGION_TYPE_COUNT]; @@ -414,24 +413,22 @@ static int pci_bios_check_devices(struct pci_bus *busses) continue; struct pci_bus *parent = &busses[pci_bdf_to_bus(s->bus_dev->bdf)]; int type; + u32 size; for (type = 0; type < PCI_REGION_TYPE_COUNT; type++) { u32 limit = (type == PCI_REGION_TYPE_IO) ? PCI_BRIDGE_IO_MIN : PCI_BRIDGE_MEM_MIN; - s->r[type].size = s->r[type].sum; - if (s->r[type].size < limit) - s->r[type].size = limit; - s->r[type].size = pci_size_roundup(s->r[type].size); - entry = pci_region_create_entry(parent, s->bus_dev, - s->r[type].size, type, 0); + size = s->r[type].sum; + if (size < limit) + size = limit; + size = pci_size_roundup(size); + entry = pci_region_create_entry(parent, s->bus_dev, size, type, 0); if (!entry) return -1; entry->this_bus = s; + dprintf(1, "PCI: secondary bus %d size 0x%x type %s\n", + entry->dev->secondary_bus, size, + region_type_name[entry->type]); } - dprintf(1, "PCI: secondary bus %d sizes: io %x, mem %x, prefmem %x\n", - secondary_bus, - s->r[PCI_REGION_TYPE_IO].size, - s->r[PCI_REGION_TYPE_MEM].size, - s->r[PCI_REGION_TYPE_PREFMEM].size); } return 0; }