[coreboot-gerrit] Patch set updated for coreboot: soc/intel/apollolake: Add CQOS CAR implementation

Tue Aug 23 23:58:11 CEST 2016

Andrey Petrov (andrey.petrov at intel.com) just uploaded a new patch set to gerrit, which you can find at https://review.coreboot.org/15455

-gerrit

commit 6a510dc57048fdc148f14a507018f1d1576a6c4a
Author: Andrey Petrov <andrey.petrov at intel.com>
Date:   Mon Jun 27 13:39:34 2016 -0700

    soc/intel/apollolake: Add CQOS CAR implementation
    
    Add new option to set up Cache-As-RAM by using CQOS, Cache Quality of
    Service. CQOS allows setting ways of cache in no-fill mode, while keeping
    other ways in regular evicting mode. This effectively allows using CAR
    and cache simultaneously.
    
    BUG=chrome-os-partner:51959
    TEST=switch from NEM to CQOS and back, boot
    
    Change-Id: Ic7f9899918f94a5788b02a4fbd2f5d5ba9aaf91d
    Signed-off-by: Andrey Petrov <andrey.petrov at intel.com>
---
 src/soc/intel/apollolake/Kconfig                  | 21 ++++++++
 src/soc/intel/apollolake/bootblock/cache_as_ram.S | 64 +++++++++++++++++++++++
 src/soc/intel/apollolake/exit_car.S               | 25 ++++++++-
 src/soc/intel/apollolake/include/soc/cpu.h        | 22 ++++++++
 4 files changed, 130 insertions(+), 2 deletions(-)

diff --git a/src/soc/intel/apollolake/Kconfig b/src/soc/intel/apollolake/Kconfig
index d8e33ca..9fa37a1 100644
--- a/src/soc/intel/apollolake/Kconfig
+++ b/src/soc/intel/apollolake/Kconfig
@@ -227,6 +227,27 @@ config NHLT_DA7219
 	default n
 	help
 	  Include DSP firmware settings for headset codec.
+choice
+	prompt "Cache-as-ram implementation"
+	default CAR_CQOS
+	help
+	  This option allows you to select how cache-as-ram (CAR) is set up.
+
+config CAR_NEM
+	bool "Non-evict mode"
+	help
+	  Traditionally, CAR is set up by using Non-Evict mode. This method
+	  does not allow CAR and cache to co-exist, because cache fills are
+	  block in NEM mode.
+
+config CAR_CQOS
+	bool "Cache Quality of Service"
+	help
+	  Cache Quality of Service allows more fine-grained control of cache
+	  usage. As result, it is possible to set up portion of L2 cache for
+	  CAR and use remainder for actual caching.
+
+endchoice
 
 config SPI_FLASH_INCLUDE_ALL_DRIVERS
 	bool
diff --git a/src/soc/intel/apollolake/bootblock/cache_as_ram.S b/src/soc/intel/apollolake/bootblock/cache_as_ram.S
index e8fae28..65dd4c8 100644
--- a/src/soc/intel/apollolake/bootblock/cache_as_ram.S
+++ b/src/soc/intel/apollolake/bootblock/cache_as_ram.S
@@ -124,12 +124,61 @@ clear_var_mtrr:
 	invd
 	mov	%eax, %cr0
 
+#if IS_ENABLED(CONFIG_CAR_NEM)
 	/* Disable cache eviction (setup stage) */
 	mov	$MSR_EVICT_CTL, %ecx
 	rdmsr
 	or	$0x1, %eax
 	wrmsr
+#else
+	/*
+	 * Disable both L1 and L2 prefetcher. For yet-to-understood reason,
+	 * prefetchers slow down filling cache with rep stos in CQOS mode.
+	 */
+	mov	$MSR_PREFETCH_CTL, %ecx
+	rdmsr
+	or	$(PREFETCH_L1_DISABLE | PREFETCH_L2_DISABLE), %eax
+	wrmsr
+#endif
+
+#if IS_ENABLED(CONFIG_CAR_CQOS)
+#if (CONFIG_DCACHE_RAM_SIZE == L2_CACHE_SIZE)
+/*
+ * If CAR size is set to full L2 size, mask is calculated as all-zeros.
+ * This is not supported by the CPU/uCode.
+ */
+#error "CQOS CAR may not use whole L2 cache area"
+#endif
+	/* Calculate how many bits to be used for CAR */
+	xor	%edx, %edx
+	mov	$CONFIG_DCACHE_RAM_SIZE, %eax	/* dividend */
+	mov	$CACHE_QOS_SIZE_PER_BIT, %ecx	/* divisor */
+	div	%ecx				/* result is in eax */
+	mov	%eax, %ecx			/* save to ecx */
+	mov	$1, %ebx
+	shl	%cl, %ebx
+	sub	$1, %ebx			/* resulting mask is is in ebx */
+
+	/* Set this mask for initial cache fill */
+	mov	$MSR_L2_QOS_MASK(0), %ecx
+	rdmsr
+	mov	%bl, %al
+	wrmsr
+
+	/* Set CLOS selector to 0 */
+	mov	$MSR_IA32_PQR_ASSOC, %ecx
+	rdmsr
+	and	$~IA32_PQR_ASSOC_MASK, %edx	/* select mask 0 */
+	wrmsr
 
+	/* We will need to block CAR region from evicts */
+	mov	$MSR_L2_QOS_MASK(1), %ecx
+	rdmsr
+	/* Invert bits that are to be used for cache */
+	mov	%bl, %al
+	xor	$~0, %al			/* invert 8 bits */
+	wrmsr
+#endif
 	post_code(0x26)
 
 	/* Clear the cache memory region. This will also fill up the cache */
@@ -140,11 +189,26 @@ clear_var_mtrr:
 
 	post_code(0x27)
 
+#if IS_ENABLED(CONFIG_CAR_NEM)
 	/* Disable cache eviction (run stage) */
 	mov	$MSR_EVICT_CTL, %ecx
 	rdmsr
 	or	$0x2, %eax
 	wrmsr
+#else
+	/* Cache is populated. Use mask 1 that will block evicts */
+	mov	$MSR_IA32_PQR_ASSOC, %ecx
+	rdmsr
+	and	$~IA32_PQR_ASSOC_MASK, %edx	/* clear index bits first */
+	or	$1, %edx			/* select mask 1 */
+	wrmsr
+
+	/* Enable prefetchers */
+	mov	$MSR_PREFETCH_CTL, %ecx
+	rdmsr
+	and	$~(PREFETCH_L1_DISABLE | PREFETCH_L2_DISABLE), %eax
+	wrmsr
+#endif
 
 	post_code(0x28)
 
diff --git a/src/soc/intel/apollolake/exit_car.S b/src/soc/intel/apollolake/exit_car.S
index 339242b..e21aa29 100644
--- a/src/soc/intel/apollolake/exit_car.S
+++ b/src/soc/intel/apollolake/exit_car.S
@@ -27,14 +27,34 @@ chipset_teardown_car:
 	 */
 	pop	%ebx
 
-	/* invalidate cache contents. */
-	invd
 	/* Disable MTRRs. */
 	mov	$(MTRR_DEF_TYPE_MSR), %ecx
 	rdmsr
 	and	$(~(MTRR_DEF_TYPE_EN | MTRR_DEF_TYPE_FIX_EN)), %eax
 	wrmsr
 
+#if IS_ENABLED(CONFIG_CAR_CQOS)
+	/* Go back to all-evicting mode, set both masks to all-1s */
+	mov	$MSR_L2_QOS_MASK(0), %ecx
+	rdmsr
+	mov	$~0, %al
+	wrmsr
+
+	mov	$MSR_L2_QOS_MASK(1), %ecx
+	rdmsr
+	mov	$~0, %al
+	wrmsr
+
+	/* Reset CLOS selector to 0 */
+	mov	$MSR_IA32_PQR_ASSOC, %ecx
+	rdmsr
+	and	$~IA32_PQR_ASSOC_MASK, %edx
+	wrmsr
+#endif
+	/* invalidate cache contents. */
+	invd
+
+#if IS_ENABLED(CONFIG_CAR_NEM)
 	/* Knock down bit 1 then bit 0 of NEM control not combining steps. */
 	mov	$(MSR_EVICT_CTL), %ecx
 	rdmsr
@@ -42,6 +62,7 @@ chipset_teardown_car:
 	wrmsr
 	and	$(~(1 << 0)), %eax
 	wrmsr
+#endif
 
 	/* Return to caller. */
 	jmp	*%ebx
diff --git a/src/soc/intel/apollolake/include/soc/cpu.h b/src/soc/intel/apollolake/include/soc/cpu.h
index 78fc0b0..300a4a2 100644
--- a/src/soc/intel/apollolake/include/soc/cpu.h
+++ b/src/soc/intel/apollolake/include/soc/cpu.h
@@ -23,6 +23,8 @@
 #include <device/device.h>
 
 void apollolake_init_cpus(struct device *dev);
+/* select which MSR_L2_QOS_MASK controls cache fill policy */
+void cache_qos_select(uint8_t index);
 #endif
 
 #define CPUID_APOLLOLAKE_A0	0x506c8
@@ -34,6 +36,26 @@ void apollolake_init_cpus(struct device *dev);
 #define MSR_EVICT_CTL		0x2e0
 #define MSR_EMULATE_PM_TMR	0x121
 #define   EMULATE_PM_TMR_EN	(1 << 16)
+#define MSR_PREFETCH_CTL	0x1a4
+#define   PREFETCH_L1_DISABLE	(1 << 0)
+#define   PREFETCH_L2_DISABLE	(1 << 2)
+
+
+#define MSR_L2_QOS_MASK(reg)		(0xd10 + reg)
+#define MSR_IA32_PQR_ASSOC		0xc8f
+/* MSR bits 33:32 encode slot number 0-3 */
+#define   IA32_PQR_ASSOC_MASK		(1 << 0 | 1 << 1)
+/* 16 way cache, 8 bits per QOS, 64 byte cache line, 1024 sets */
+#define CACHE_WAYS			16
+#define CACHE_BITS_PER_MASK		8
+#define CACHE_LINE_SIZE			64
+#define CACHE_SETS			1024
+/*
+ * Each bit in QOS mask controls this many bytes. This is calculated as:
+ * (CACHE_WAYS / CACHE_BITS_PER_MASK) * CACHE_LINE_SIZE * CACHE_SETS
+ */
+#define CACHE_QOS_SIZE_PER_BIT		(128 * KiB)
+#define L2_CACHE_SIZE			0x100000
 
 #define BASE_CLOCK_MHZ		100