Michał Żygowski has uploaded this change for review.

View Change

cpu/amd/family_10h-family_15h/cache_as_ram.S: Rewrite the CAR setup

Simplify the code a little bit for the CAR MTRR. Rearrange the AP
stacks calculation.

Signed-off-by: Michał Żygowski <michal.zygowski@3mdeb.com>
Change-Id: I1efdc35bbc4437f54461dba54eb38afe85517817
---
M src/cpu/amd/family_10h-family_15h/Kconfig
M src/cpu/amd/family_10h-family_15h/cache_as_ram.S
M src/cpu/amd/socket_G34/Kconfig
3 files changed, 89 insertions(+), 184 deletions(-)

git pull ssh://review.coreboot.org:29418/coreboot refs/changes/88/62088/1
diff --git a/src/cpu/amd/family_10h-family_15h/Kconfig b/src/cpu/amd/family_10h-family_15h/Kconfig
index ee1b6ee..ccc4921 100644
--- a/src/cpu/amd/family_10h-family_15h/Kconfig
+++ b/src/cpu/amd/family_10h-family_15h/Kconfig
@@ -15,13 +15,6 @@

if CPU_AMD_MODEL_10XXX

-config USE_LARGE_DCACHE
- bool
- default y if CPU_AMD_SOCKET_G34_NON_AGESA
- default y if CPU_AMD_SOCKET_FM2_NON_AGESA
- default y if CPU_AMD_SOCKET_C32_NON_AGESA
- default n
-
config NUM_IPI_STARTS
int
default 1
@@ -30,27 +23,6 @@
int
default 48

-config DCACHE_RAM_BASE
- hex
- default 0xc4000
-
-config DCACHE_RAM_SIZE
- hex
- default 0x0c000
-
-config DCACHE_BSP_STACK_SIZE
- hex
- default 0x4000
-
-config DCACHE_BSP_TOP_STACK_SLUSH
- hex
- default 0x4000 if USE_LARGE_DCACHE
- default 0x1000
-
-config DCACHE_AP_STACK_SIZE
- hex
- default 0x500
-
config SET_FIDVID
bool
default y
diff --git a/src/cpu/amd/family_10h-family_15h/cache_as_ram.S b/src/cpu/amd/family_10h-family_15h/cache_as_ram.S
index 70f1b01..7fb19c2 100644
--- a/src/cpu/amd/family_10h-family_15h/cache_as_ram.S
+++ b/src/cpu/amd/family_10h-family_15h/cache_as_ram.S
@@ -68,7 +68,7 @@
movl %eax, %ebx /* We store the status. */
cvtsi2sd %ebx, %xmm5

- jmp_if_k8(CAR_FAM10_out_post_errata)
+ jmp_if_k8(CAR_FAM10_errata_applied)

/*
* For GH, CAR need to set DRAM Base/Limit registers to direct that
@@ -145,9 +145,8 @@

/* Determine if this is the second core to start in a compute unit; if so, wait for first core start, clear init detect and skip MTRR init */
bt $24, %eax
- jnc skip_cu_check /* First core in the compute unit jumps to skip_cu_check */
+ jnc skip_cu_check /* First core in the compute unit jumps to skip_cu_check */

- /* Determine if this is the second core to start in a compute unit; if so, clear init detect and skip MTRR init */
/* Busywait until the first core sets up the MTRRs */
check_init_detect_1:
/* Check if cpu_init_detected. */
@@ -204,106 +203,31 @@

jmp clear_fixed_var_mtrr
clear_fixed_var_mtrr_out:
-
-/*
- * 0x06 is the WB IO type for a given 4k segment.
- * 0x1e is the MEM IO type for a given 4k segment (K10 and above).
- * segs is the number of 4k segments in the area of the particular
- * register we want to use for CAR.
- * reg is the register where the IO type should be stored.
- */
-.macro extractmask segs, reg
-.if \segs <= 0
- /*
- * The xorl here is superfluous because at the point of first execution
- * of this macro, %eax and %edx are cleared. Later invocations of this
- * macro will have a monotonically increasing segs parameter.
- */
- xorl \reg, \reg
-.else
- jmp_if_k8(1f)
-
-.if \segs == 1
- movl $0x1e000000, \reg /* WB MEM type */
-.elseif \segs == 2
- movl $0x1e1e0000, \reg /* WB MEM type */
-.elseif \segs == 3
- movl $0x1e1e1e00, \reg /* WB MEM type */
-.elseif \segs >= 4
- movl $0x1e1e1e1e, \reg /* WB MEM type */
-.endif
- jmp 2f
-1:
-.if \segs == 1
- movl $0x06000000, \reg /* WB IO type */
-.elseif \segs == 2
- movl $0x06060000, \reg /* WB IO type */
-.elseif \segs == 3
- movl $0x06060600, \reg /* WB IO type */
-.elseif \segs >= 4
- movl $0x06060606, \reg /* WB IO type */
-.endif
-2:
-.endif /* if \segs <= 0 */
-.endm
-
-/*
- * carsize is the cache size in bytes we want to use for CAR.
- * windowoffset is the 32k-aligned window into CAR size.
- */
-.macro simplemask carsize, windowoffset
- .set gas_bug_workaround,(((\carsize - \windowoffset) >> 12) - 4)
- extractmask gas_bug_workaround, %eax
- .set gas_bug_workaround,(((\carsize - \windowoffset) >> 12))
- extractmask gas_bug_workaround, %edx
- /*
- * Without the gas bug workaround, the entire macro would consist
- * only of the two lines below:
- * extractmask (((\carsize - \windowoffset) >> 12) - 4), %eax
- * extractmask (((\carsize - \windowoffset) >> 12)), %edx
- */
-.endm
-
-#if CONFIG(CPU_AMD_MODEL_10XXX)
- #if CacheSize > 0x80000
- #error Invalid CAR size, must be at most 128k (processor limit is 512k).
- #endif
-#else
- #if CacheSize > 0x10000
- #error Invalid CAR size, must be at most 64k.
- #endif
-#endif
-#if CacheSize < 0x1000
-#error Invalid CAR size, must be at least 4k. This is a processor limitation.
-#endif
-#if (CacheSize & (0x1000 - 1))
-#error Invalid CAR size, is not a multiple of 4k. This is a processor limitation.
-#endif
-
-#if CacheSize > 0x8000
- /* Enable caching for 32K-64K using fixed MTRR. */
- movl $MTRR_FIX_4K_C0000, %ecx
- simplemask CacheSize, 0x8000
+ /* CAR region base 0x30000 */
+ movl $MTRR_FIX_64K_00000, %ecx
+ movl $0x1e000000, %eax
+ movl $0x1e, %edx
wrmsr
-#endif

-#if CacheSize > 0x10000
- /* Enable caching for 64K-96K using fixed MTRR. */
- movl $MTRR_FIX_4K_D0000, %ecx
- simplemask CacheSize, 0x10000
+ /* Enable ROM caching */
+ movl $MTRR_PHYS_BASE(0), %ecx
+ xorl %edx, %edx
+ movl $_rom_mtrr_base, %eax
+ orl $MTRR_TYPE_WRPROT, %eax
wrmsr
-#endif

-#if CacheSize > 0x18000
- /* Enable caching for 96K-128K using fixed MTRR. */
- movl $MTRR_FIX_4K_D8000, %ecx
- simplemask CacheSize, 0x18000
- wrmsr
-#endif
+ /* Determine CPU_ADDR_BITS and load PHYSMASK high word to %edx. */
+ movl $0x80000008, %eax
+ cpuid
+ movb %al, %cl
+ sub $32, %cl
+ movl $1, %edx
+ shl %cl, %edx
+ subl $1, %edx

- /* Enable caching for 0-32K using fixed MTRR. */
- movl $MTRR_FIX_4K_C8000, %ecx
- simplemask CacheSize, 0
+ movl $MTRR_PHYS_MASK(0), %ecx
+ movl $_rom_mtrr_mask, %eax
+ orl $MTRR_PHYS_MASK_VALID, %eax
wrmsr

jmp_if_fam15h(fam15_skip_dram_mtrr_setup)
@@ -315,32 +239,6 @@
wrmsr

fam15_skip_dram_mtrr_setup:
-
-#if CONFIG_XIP_ROM_SIZE
-
- /* Enable write base caching so we can do execute in place (XIP)
- * on the flash ROM.
- */
- movl $MTRR_PHYS_BASE(1), %ecx
- xorl %edx, %edx
- /*
- * IMPORTANT: The following calculation _must_ be done at runtime. See
- * https://mail.coreboot.org/pipermail/coreboot/2010-October/060922.html
- */
- movl $_program, %eax
- andl $(~(CONFIG_XIP_ROM_SIZE - 1)), %eax
- orl $MTRR_TYPE_WRBACK, %eax
- wrmsr
-
- movl $MTRR_PHYS_MASK(1), %ecx
- movl $0xff, %edx /* (1 << (CONFIG_CPU_ADDR_BITS - 32)) - 1 for K8 (CONFIG_CPU_ADDR_BITS = 40) */
- jmp_if_k8(wbcache_post_fam10_setup)
- movl $0xffff, %edx /* (1 << (CONFIG_CPU_ADDR_BITS - 32)) - 1 for FAM10 (CONFIG_CPU_ADDR_BITS = 48) */
-wbcache_post_fam10_setup:
- movl $(~(CONFIG_XIP_ROM_SIZE - 1) | MTRR_PHYS_MASK_VALID), %eax
- wrmsr
-#endif /* CONFIG_XIP_ROM_SIZE */
-
/* Set the default memory type and enable fixed and variable MTRRs. */
movl $MTRR_DEF_TYPE_MSR, %ecx
xorl %edx, %edx
@@ -420,13 +318,13 @@

/* Read the range with lodsl. */
cld
- movl $CacheBase, %esi
- movl $(CacheSize >> 2), %ecx
+ movl $CONFIG_DCACHE_RAM_BASE, %esi
+ movl $(CONFIG_DCACHE_RAM_SIZE >> 2), %ecx
rep lodsl

/* Clear the range. */
- movl $CacheBase, %edi
- movl $(CacheSize >> 2), %ecx
+ movl $CONFIG_DCACHE_RAM_BASE, %edi
+ movl $(CONFIG_DCACHE_RAM_SIZE >> 2), %ecx
xorl %eax, %eax
rep stosl

@@ -444,12 +342,7 @@
CAR_skip_k8_errata_part2:

/* Set up the stack pointer. */
- movl $(CacheBase + CacheSize), %eax
- movl %eax, %esp
-
- /* Poison the lower stack boundary */
- movl $((CacheBase + CacheSize) - CacheSizeBSPStack), %eax
- movl $0xdeadbeef, (%eax)
+ movl $_ecar_stack, %esp

post_code(0xa3)

@@ -458,15 +351,18 @@
/*
* Need to set stack pointer for AP.
* It will be from:
- * CacheBase + (CacheSize - (CacheSizeBSPStack + CacheSizeBSPSlush))
- * - (NodeID << CoreIDbits + CoreID) * CacheSizeAPStack
- * The spacing between the BSP stack and the top of the AP
- * stacks is purposefully set larger (an extra CacheSizeBSPSlush
+ * _ecar_stack - (0x4000 + CONFIG_DCACHE_BSP_TOP_STACK_SLUSH)
+ * - (NodeID << CoreIDbits + CoreID) * CONFIG_DCACHE_AP_STACK_SIZE
+ * 0x4000 is the default BSP stack size, because CONFIG_DCACHE_BSP_STACK_SIZE
+ * indicates whole stack region in car.ld
+ *
+ * The spacing between the BSP stack and the top of the AP stacks is
+ * purposefully set larger (an extra CONFIG_DCACHE_BSP_TOP_STACK_SLUSH
* worth of unused space) than necessary to aid debugging when
* additional stack variables are added by future developers.
- * The extra space will allow BSP overruns to be caught by
- * the warning logic and easily fixed instead of crashing the
- * system with no obvious clues of what went wrong.
+ * The extra space will allow BSP overruns to be caught by the warning
+ * logic and easily fixed instead of crashing the system with no
+ * obvious clues of what went wrong.
*
* So, need to get the NodeID and CoreID at first.
* If NB_CFG_MSR bit 54 is set just use initial APIC ID, otherwise need
@@ -511,10 +407,15 @@

ap_apicid_ready:

+
+ap_apicid_ready:
+
+ /* Save the APIC ID in ecx for later */
+ movl %ebx, %ecx
/* Calculate stack pointer using adjusted APIC ID stored in ebx */
- movl $CacheSizeAPStack, %eax
+ movl $CONFIG_DCACHE_AP_STACK_SIZE, %eax
mull %ebx
- movl $(CacheBase + (CacheSize - (CacheSizeBSPStack + CacheSizeBSPSlush))), %esp
+ movl $(_ecar_stack - (0x4000 + CONFIG_DCACHE_BSP_TOP_STACK_SLUSH)), %esp
subl %eax, %esp

/* Restore init detect */
@@ -522,24 +423,40 @@

post_code(0xa4)

+ test %ebx, %ebx
+ jz CAR_FAM10_ap_out
+
+ /* Save the core INIT detect bit in NB scratch register */
+ movl $1, %ebx
+ roll %ecx, %ebx /* one bit for each core, max 32 cores */
+
+ movl $0x80000080, %eax
+ movw $0xcf8, %dx
+ outl %eax, %dx
+ movw $0xcfc, %dx
+ inl %dx, %eax
+ orl %ebx, %eax
+ outl %eax, %dx
+
CAR_FAM10_ap_out:

post_code(0xa5)

- /* Disable SSE. */
- movl %cr4, %eax
- andl $~(3 << 9), %eax
- movl %eax, %cr4
+ /* Need to align stack to 16 bytes at call instruction.
+ * Account for the pushes below.
+ */
+ andl $0xfffffff0, %esp
+ subl $4, %esp

post_code(0xa6)

- /* Restore the BIST result. */
- movl %ebp, %eax
-
- /* We need to set EBP? No need. */
- movl %esp, %ebp
- pushl %ebx /* Init detected. */
- pushl %eax /* BIST */
+ /* push TSC and BIST to stack */
+ movd %mm0, %eax
+ pushl %eax /* BIST */
+ movd %mm2, %eax
+ pushl %eax /* tsc[63:32] */
+ movd %mm1, %eax
+ pushl %eax /* tsc[31:0] */

post_code(0xa7)

diff --git a/src/cpu/amd/socket_G34/Kconfig b/src/cpu/amd/socket_G34/Kconfig
index abc9726..52fb1317 100644
--- a/src/cpu/amd/socket_G34/Kconfig
+++ b/src/cpu/amd/socket_G34/Kconfig
@@ -22,8 +22,24 @@
hex
default 0x18

-config XIP_ROM_SIZE
+config DCACHE_RAM_BASE
hex
- default 0x80000
+ default 0x30000
+
+config DCACHE_RAM_SIZE
+ hex
+ default 0x30000
+
+config DCACHE_BSP_STACK_SIZE
+ hex
+ default 0x10000
+
+config DCACHE_BSP_TOP_STACK_SLUSH
+ hex
+ default 0x2000
+
+config DCACHE_AP_STACK_SIZE
+ hex
+ default 0x500

endif

To view, visit change 62088. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: coreboot
Gerrit-Branch: master
Gerrit-Change-Id: I1efdc35bbc4437f54461dba54eb38afe85517817
Gerrit-Change-Number: 62088
Gerrit-PatchSet: 1
Gerrit-Owner: Michał Żygowski <michal.zygowski@3mdeb.com>
Gerrit-MessageType: newchange