Patch set updated for coreboot: 38f4cbc coreboot: add MIRROR_PAYLOAD_TO_RAM_BEFORE_LOADING option - coreboot-gerrit

25 Feb 2014

Aaron Durbin (adurbin@google.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/5305
-gerrit
commit 38f4cbc170d1cb66743f3bab91a565bdd8cf1266
Author: Aaron Durbin adurbin@chromium.org
Date:   Tue Feb 25 20:36:56 2014 -0600
coreboot: add MIRROR_PAYLOAD_TO_RAM_BEFORE_LOADING option
Boot speeds can be sped up by mirroring the payload into
    main memory before doing the actual loading. Systems that
    would benefit from this are typically Intel ones whose SPI
    are memory mapped. Without the SPI being cached all accesses
    to the payload in SPI while being loaded result in uncacheable
    accesses. Instead take advantage of the on-board SPI controller
    which has an internal cache and prefetcher by copying 64-byte
    cachelines using 32-bit word copies.
Change-Id: I4aac856b1b5130fa2d68a6c45a96cfeead472a52
    Signed-off-by: Aaron Durbin adurbin@chromium.org
---
 src/Kconfig                            | 10 +++++++
 src/lib/loaders/load_and_run_payload.c | 53 ++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+)

diff --git a/src/Kconfig b/src/Kconfig
index 4040705..da6960d 100644
--- a/src/Kconfig
+++ b/src/Kconfig
@@ -697,6 +697,16 @@ config LINUX_INITRD
    help
      An initrd image to add to the Linux kernel.
+config MIRROR_PAYLOAD_TO_RAM_BEFORE_LOADING
+	bool "Copy payload contents to ram before loading to final destination."
+	default n
+	help
+	  On certain platforms a boot speed gain can be realized if mirroring
+	  the payload data stored in non-volatile storage. On x86 systems the
+	  payload would typically live in a memory-mapped SPI part. Copying
+	  the SPI contents to ram before performing the load can speed up
+	  the boot process.
+
 endmenu
menu "Debugging"
diff --git a/src/lib/loaders/load_and_run_payload.c b/src/lib/loaders/load_and_run_payload.c
index 7e1383e..f3156b5 100644
--- a/src/lib/loaders/load_and_run_payload.c
+++ b/src/lib/loaders/load_and_run_payload.c
@@ -19,7 +19,9 @@
#include <stdint.h>
 #include <stdlib.h>
+#include <string.h>
 #include <console/console.h>
+#include <bootmem.h>
 #include <fallback.h>
 #include <lib.h>
 #include <payload_loader.h>
@@ -39,6 +41,53 @@ static struct payload global_payload = {
    .name = CONFIG_CBFS_PREFIX "/payload",
 };
+static void mirror_payload(struct payload *payload)
+{
+	char *buffer;
+	size_t size;
+	char *src;
+	uintptr_t alignment_diff;
+	const unsigned long cacheline_size = 64;
+	const uintptr_t intra_cacheline_mask = cacheline_size - 1;
+	const uintptr_t cacheline_mask = ~intra_cacheline_mask;
+
+	src = payload->backing_store.data;
+	size = payload->backing_store.size;
+
+	/*
+	 * Adjust size so that the start and end points are aligned to a
+	 * cacheline. The SPI hardware controllers on Intel machines hould
+	 * ead and cache full length cachelines as well as prefetch data.
+	 * Once the data is mirrored in memory all accesses should hit the
+	 * CPU's cache.
+	 */
+	alignment_diff = (intra_cacheline_mask & (uintptr_t)src);
+	size += alignment_diff;
+
+	size = ALIGN(size, cacheline_size);
+
+	printk(BIOS_DEBUG, "Payload aligned size: 0x%zx\n", size);
+
+	buffer = bootmem_allocate_buffer(size);
+
+	if (buffer == NULL) {
+		printk(BIOS_DEBUG, "No buffer for mirroring payload.\n");
+		return;
+	}
+
+	src = (void *)(cacheline_mask & (uintptr_t)src);
+
+	/*
+	 * Note that if mempcy is not using 32-bit moves the performance will
+	 * degrade because the SPI hardware prefetchers look for
+	 * cacheline-aligned 32-bit accesses to kick in.
+	 */
+	memcpy(buffer, src, size);
+
+	/* Update the payload's backing store. */
+	payload->backing_store.data = &buffer[alignment_diff];
+}
+
 struct payload *payload_load(void)
 {
    int i;
@@ -62,6 +111,10 @@ struct payload *payload_load(void)
    if (i == ARRAY_SIZE(payload_ops))
    	return NULL;
+	if (IS_ENABLED(CONFIG_MIRROR_PAYLOAD_TO_RAM_BEFORE_LOADING)) {
+		mirror_payload(payload);
+	}
+
    entry = selfload(payload);
if (entry == NULL)