Aaron Durbin (adurbin@google.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/5305
-gerrit
commit 19d1ec2f6af5c590a22e9a7467b0cb798e96bd52 Author: Aaron Durbin adurbin@chromium.org Date: Tue Feb 25 20:36:56 2014 -0600
coreboot: add MIRROR_PAYLOAD_TO_RAM_BEFORE_LOADING option
Boot speeds can be sped up by mirroring the payload into main memory before doing the actual loading. Systems that would benefit from this are typically Intel ones whose SPI are memory mapped. Without the SPI being cached all accesses to the payload in SPI while being loaded result in uncacheable accesses. Instead take advantage of the on-board SPI controller which has an internal cache and prefetcher by copying 64-byte cachelines using 32-bit word copies.
Change-Id: I4aac856b1b5130fa2d68a6c45a96cfeead472a52 Signed-off-by: Aaron Durbin adurbin@chromium.org --- src/Kconfig | 10 +++++++ src/lib/loaders/load_and_run_payload.c | 53 ++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+)
diff --git a/src/Kconfig b/src/Kconfig index 4040705..da6960d 100644 --- a/src/Kconfig +++ b/src/Kconfig @@ -697,6 +697,16 @@ config LINUX_INITRD help An initrd image to add to the Linux kernel.
+config MIRROR_PAYLOAD_TO_RAM_BEFORE_LOADING + bool "Copy payload contents to ram before loading to final destination." + default n + help + On certain platforms a boot speed gain can be realized if mirroring + the payload data stored in non-volatile storage. On x86 systems the + payload would typically live in a memory-mapped SPI part. Copying + the SPI contents to ram before performing the load can speed up + the boot process. + endmenu
menu "Debugging" diff --git a/src/lib/loaders/load_and_run_payload.c b/src/lib/loaders/load_and_run_payload.c index 7e1383e..f3156b5 100644 --- a/src/lib/loaders/load_and_run_payload.c +++ b/src/lib/loaders/load_and_run_payload.c @@ -19,7 +19,9 @@
#include <stdint.h> #include <stdlib.h> +#include <string.h> #include <console/console.h> +#include <bootmem.h> #include <fallback.h> #include <lib.h> #include <payload_loader.h> @@ -39,6 +41,53 @@ static struct payload global_payload = { .name = CONFIG_CBFS_PREFIX "/payload", };
+static void mirror_payload(struct payload *payload) +{ + char *buffer; + size_t size; + char *src; + uintptr_t alignment_diff; + const unsigned long cacheline_size = 64; + const uintptr_t intra_cacheline_mask = cacheline_size - 1; + const uintptr_t cacheline_mask = ~intra_cacheline_mask; + + src = payload->backing_store.data; + size = payload->backing_store.size; + + /* + * Adjust size so that the start and end points are aligned to a + * cacheline. The SPI hardware controllers on Intel machines hould + * ead and cache full length cachelines as well as prefetch data. + * Once the data is mirrored in memory all accesses should hit the + * CPU's cache. + */ + alignment_diff = (intra_cacheline_mask & (uintptr_t)src); + size += alignment_diff; + + size = ALIGN(size, cacheline_size); + + printk(BIOS_DEBUG, "Payload aligned size: 0x%zx\n", size); + + buffer = bootmem_allocate_buffer(size); + + if (buffer == NULL) { + printk(BIOS_DEBUG, "No buffer for mirroring payload.\n"); + return; + } + + src = (void *)(cacheline_mask & (uintptr_t)src); + + /* + * Note that if mempcy is not using 32-bit moves the performance will + * degrade because the SPI hardware prefetchers look for + * cacheline-aligned 32-bit accesses to kick in. + */ + memcpy(buffer, src, size); + + /* Update the payload's backing store. */ + payload->backing_store.data = &buffer[alignment_diff]; +} + struct payload *payload_load(void) { int i; @@ -62,6 +111,10 @@ struct payload *payload_load(void) if (i == ARRAY_SIZE(payload_ops)) return NULL;
+ if (IS_ENABLED(CONFIG_MIRROR_PAYLOAD_TO_RAM_BEFORE_LOADING)) { + mirror_payload(payload); + } + entry = selfload(payload);
if (entry == NULL)