Michał Żygowski has uploaded this change for review. ( https://review.coreboot.org/c/coreboot/+/62088 )
Change subject: cpu/amd/family_10h-family_15h/cache_as_ram.S: Rewrite the CAR setup ......................................................................
cpu/amd/family_10h-family_15h/cache_as_ram.S: Rewrite the CAR setup
Simplify the code a little bit for the CAR MTRR. Rearrange the AP stacks calculation.
Signed-off-by: Michał Żygowski michal.zygowski@3mdeb.com Change-Id: I1efdc35bbc4437f54461dba54eb38afe85517817 --- M src/cpu/amd/family_10h-family_15h/Kconfig M src/cpu/amd/family_10h-family_15h/cache_as_ram.S M src/cpu/amd/socket_G34/Kconfig 3 files changed, 89 insertions(+), 184 deletions(-)
git pull ssh://review.coreboot.org:29418/coreboot refs/changes/88/62088/1
diff --git a/src/cpu/amd/family_10h-family_15h/Kconfig b/src/cpu/amd/family_10h-family_15h/Kconfig index ee1b6ee..ccc4921 100644 --- a/src/cpu/amd/family_10h-family_15h/Kconfig +++ b/src/cpu/amd/family_10h-family_15h/Kconfig @@ -15,13 +15,6 @@
if CPU_AMD_MODEL_10XXX
-config USE_LARGE_DCACHE - bool - default y if CPU_AMD_SOCKET_G34_NON_AGESA - default y if CPU_AMD_SOCKET_FM2_NON_AGESA - default y if CPU_AMD_SOCKET_C32_NON_AGESA - default n - config NUM_IPI_STARTS int default 1 @@ -30,27 +23,6 @@ int default 48
-config DCACHE_RAM_BASE - hex - default 0xc4000 - -config DCACHE_RAM_SIZE - hex - default 0x0c000 - -config DCACHE_BSP_STACK_SIZE - hex - default 0x4000 - -config DCACHE_BSP_TOP_STACK_SLUSH - hex - default 0x4000 if USE_LARGE_DCACHE - default 0x1000 - -config DCACHE_AP_STACK_SIZE - hex - default 0x500 - config SET_FIDVID bool default y diff --git a/src/cpu/amd/family_10h-family_15h/cache_as_ram.S b/src/cpu/amd/family_10h-family_15h/cache_as_ram.S index 70f1b01..7fb19c2 100644 --- a/src/cpu/amd/family_10h-family_15h/cache_as_ram.S +++ b/src/cpu/amd/family_10h-family_15h/cache_as_ram.S @@ -68,7 +68,7 @@ movl %eax, %ebx /* We store the status. */ cvtsi2sd %ebx, %xmm5
- jmp_if_k8(CAR_FAM10_out_post_errata) + jmp_if_k8(CAR_FAM10_errata_applied)
/* * For GH, CAR need to set DRAM Base/Limit registers to direct that @@ -145,9 +145,8 @@
/* Determine if this is the second core to start in a compute unit; if so, wait for first core start, clear init detect and skip MTRR init */ bt $24, %eax - jnc skip_cu_check /* First core in the compute unit jumps to skip_cu_check */ + jnc skip_cu_check /* First core in the compute unit jumps to skip_cu_check */
- /* Determine if this is the second core to start in a compute unit; if so, clear init detect and skip MTRR init */ /* Busywait until the first core sets up the MTRRs */ check_init_detect_1: /* Check if cpu_init_detected. */ @@ -204,106 +203,31 @@
jmp clear_fixed_var_mtrr clear_fixed_var_mtrr_out: - -/* - * 0x06 is the WB IO type for a given 4k segment. - * 0x1e is the MEM IO type for a given 4k segment (K10 and above). - * segs is the number of 4k segments in the area of the particular - * register we want to use for CAR. - * reg is the register where the IO type should be stored. - */ -.macro extractmask segs, reg -.if \segs <= 0 - /* - * The xorl here is superfluous because at the point of first execution - * of this macro, %eax and %edx are cleared. Later invocations of this - * macro will have a monotonically increasing segs parameter. - */ - xorl \reg, \reg -.else - jmp_if_k8(1f) - -.if \segs == 1 - movl $0x1e000000, \reg /* WB MEM type */ -.elseif \segs == 2 - movl $0x1e1e0000, \reg /* WB MEM type */ -.elseif \segs == 3 - movl $0x1e1e1e00, \reg /* WB MEM type */ -.elseif \segs >= 4 - movl $0x1e1e1e1e, \reg /* WB MEM type */ -.endif - jmp 2f -1: -.if \segs == 1 - movl $0x06000000, \reg /* WB IO type */ -.elseif \segs == 2 - movl $0x06060000, \reg /* WB IO type */ -.elseif \segs == 3 - movl $0x06060600, \reg /* WB IO type */ -.elseif \segs >= 4 - movl $0x06060606, \reg /* WB IO type */ -.endif -2: -.endif /* if \segs <= 0 */ -.endm - -/* - * carsize is the cache size in bytes we want to use for CAR. - * windowoffset is the 32k-aligned window into CAR size. - */ -.macro simplemask carsize, windowoffset - .set gas_bug_workaround,(((\carsize - \windowoffset) >> 12) - 4) - extractmask gas_bug_workaround, %eax - .set gas_bug_workaround,(((\carsize - \windowoffset) >> 12)) - extractmask gas_bug_workaround, %edx - /* - * Without the gas bug workaround, the entire macro would consist - * only of the two lines below: - * extractmask (((\carsize - \windowoffset) >> 12) - 4), %eax - * extractmask (((\carsize - \windowoffset) >> 12)), %edx - */ -.endm - -#if CONFIG(CPU_AMD_MODEL_10XXX) - #if CacheSize > 0x80000 - #error Invalid CAR size, must be at most 128k (processor limit is 512k). - #endif -#else - #if CacheSize > 0x10000 - #error Invalid CAR size, must be at most 64k. - #endif -#endif -#if CacheSize < 0x1000 -#error Invalid CAR size, must be at least 4k. This is a processor limitation. -#endif -#if (CacheSize & (0x1000 - 1)) -#error Invalid CAR size, is not a multiple of 4k. This is a processor limitation. -#endif - -#if CacheSize > 0x8000 - /* Enable caching for 32K-64K using fixed MTRR. */ - movl $MTRR_FIX_4K_C0000, %ecx - simplemask CacheSize, 0x8000 + /* CAR region base 0x30000 */ + movl $MTRR_FIX_64K_00000, %ecx + movl $0x1e000000, %eax + movl $0x1e, %edx wrmsr -#endif
-#if CacheSize > 0x10000 - /* Enable caching for 64K-96K using fixed MTRR. */ - movl $MTRR_FIX_4K_D0000, %ecx - simplemask CacheSize, 0x10000 + /* Enable ROM caching */ + movl $MTRR_PHYS_BASE(0), %ecx + xorl %edx, %edx + movl $_rom_mtrr_base, %eax + orl $MTRR_TYPE_WRPROT, %eax wrmsr -#endif
-#if CacheSize > 0x18000 - /* Enable caching for 96K-128K using fixed MTRR. */ - movl $MTRR_FIX_4K_D8000, %ecx - simplemask CacheSize, 0x18000 - wrmsr -#endif + /* Determine CPU_ADDR_BITS and load PHYSMASK high word to %edx. */ + movl $0x80000008, %eax + cpuid + movb %al, %cl + sub $32, %cl + movl $1, %edx + shl %cl, %edx + subl $1, %edx
- /* Enable caching for 0-32K using fixed MTRR. */ - movl $MTRR_FIX_4K_C8000, %ecx - simplemask CacheSize, 0 + movl $MTRR_PHYS_MASK(0), %ecx + movl $_rom_mtrr_mask, %eax + orl $MTRR_PHYS_MASK_VALID, %eax wrmsr
jmp_if_fam15h(fam15_skip_dram_mtrr_setup) @@ -315,32 +239,6 @@ wrmsr
fam15_skip_dram_mtrr_setup: - -#if CONFIG_XIP_ROM_SIZE - - /* Enable write base caching so we can do execute in place (XIP) - * on the flash ROM. - */ - movl $MTRR_PHYS_BASE(1), %ecx - xorl %edx, %edx - /* - * IMPORTANT: The following calculation _must_ be done at runtime. See - * https://mail.coreboot.org/pipermail/coreboot/2010-October/060922.html - */ - movl $_program, %eax - andl $(~(CONFIG_XIP_ROM_SIZE - 1)), %eax - orl $MTRR_TYPE_WRBACK, %eax - wrmsr - - movl $MTRR_PHYS_MASK(1), %ecx - movl $0xff, %edx /* (1 << (CONFIG_CPU_ADDR_BITS - 32)) - 1 for K8 (CONFIG_CPU_ADDR_BITS = 40) */ - jmp_if_k8(wbcache_post_fam10_setup) - movl $0xffff, %edx /* (1 << (CONFIG_CPU_ADDR_BITS - 32)) - 1 for FAM10 (CONFIG_CPU_ADDR_BITS = 48) */ -wbcache_post_fam10_setup: - movl $(~(CONFIG_XIP_ROM_SIZE - 1) | MTRR_PHYS_MASK_VALID), %eax - wrmsr -#endif /* CONFIG_XIP_ROM_SIZE */ - /* Set the default memory type and enable fixed and variable MTRRs. */ movl $MTRR_DEF_TYPE_MSR, %ecx xorl %edx, %edx @@ -420,13 +318,13 @@
/* Read the range with lodsl. */ cld - movl $CacheBase, %esi - movl $(CacheSize >> 2), %ecx + movl $CONFIG_DCACHE_RAM_BASE, %esi + movl $(CONFIG_DCACHE_RAM_SIZE >> 2), %ecx rep lodsl
/* Clear the range. */ - movl $CacheBase, %edi - movl $(CacheSize >> 2), %ecx + movl $CONFIG_DCACHE_RAM_BASE, %edi + movl $(CONFIG_DCACHE_RAM_SIZE >> 2), %ecx xorl %eax, %eax rep stosl
@@ -444,12 +342,7 @@ CAR_skip_k8_errata_part2:
/* Set up the stack pointer. */ - movl $(CacheBase + CacheSize), %eax - movl %eax, %esp - - /* Poison the lower stack boundary */ - movl $((CacheBase + CacheSize) - CacheSizeBSPStack), %eax - movl $0xdeadbeef, (%eax) + movl $_ecar_stack, %esp
post_code(0xa3)
@@ -458,15 +351,18 @@ /* * Need to set stack pointer for AP. * It will be from: - * CacheBase + (CacheSize - (CacheSizeBSPStack + CacheSizeBSPSlush)) - * - (NodeID << CoreIDbits + CoreID) * CacheSizeAPStack - * The spacing between the BSP stack and the top of the AP - * stacks is purposefully set larger (an extra CacheSizeBSPSlush + * _ecar_stack - (0x4000 + CONFIG_DCACHE_BSP_TOP_STACK_SLUSH) + * - (NodeID << CoreIDbits + CoreID) * CONFIG_DCACHE_AP_STACK_SIZE + * 0x4000 is the default BSP stack size, because CONFIG_DCACHE_BSP_STACK_SIZE + * indicates whole stack region in car.ld + * + * The spacing between the BSP stack and the top of the AP stacks is + * purposefully set larger (an extra CONFIG_DCACHE_BSP_TOP_STACK_SLUSH * worth of unused space) than necessary to aid debugging when * additional stack variables are added by future developers. - * The extra space will allow BSP overruns to be caught by - * the warning logic and easily fixed instead of crashing the - * system with no obvious clues of what went wrong. + * The extra space will allow BSP overruns to be caught by the warning + * logic and easily fixed instead of crashing the system with no + * obvious clues of what went wrong. * * So, need to get the NodeID and CoreID at first. * If NB_CFG_MSR bit 54 is set just use initial APIC ID, otherwise need @@ -511,10 +407,15 @@
ap_apicid_ready:
+ +ap_apicid_ready: + + /* Save the APIC ID in ecx for later */ + movl %ebx, %ecx /* Calculate stack pointer using adjusted APIC ID stored in ebx */ - movl $CacheSizeAPStack, %eax + movl $CONFIG_DCACHE_AP_STACK_SIZE, %eax mull %ebx - movl $(CacheBase + (CacheSize - (CacheSizeBSPStack + CacheSizeBSPSlush))), %esp + movl $(_ecar_stack - (0x4000 + CONFIG_DCACHE_BSP_TOP_STACK_SLUSH)), %esp subl %eax, %esp
/* Restore init detect */ @@ -522,24 +423,40 @@
post_code(0xa4)
+ test %ebx, %ebx + jz CAR_FAM10_ap_out + + /* Save the core INIT detect bit in NB scratch register */ + movl $1, %ebx + roll %ecx, %ebx /* one bit for each core, max 32 cores */ + + movl $0x80000080, %eax + movw $0xcf8, %dx + outl %eax, %dx + movw $0xcfc, %dx + inl %dx, %eax + orl %ebx, %eax + outl %eax, %dx + CAR_FAM10_ap_out:
post_code(0xa5)
- /* Disable SSE. */ - movl %cr4, %eax - andl $~(3 << 9), %eax - movl %eax, %cr4 + /* Need to align stack to 16 bytes at call instruction. + * Account for the pushes below. + */ + andl $0xfffffff0, %esp + subl $4, %esp
post_code(0xa6)
- /* Restore the BIST result. */ - movl %ebp, %eax - - /* We need to set EBP? No need. */ - movl %esp, %ebp - pushl %ebx /* Init detected. */ - pushl %eax /* BIST */ + /* push TSC and BIST to stack */ + movd %mm0, %eax + pushl %eax /* BIST */ + movd %mm2, %eax + pushl %eax /* tsc[63:32] */ + movd %mm1, %eax + pushl %eax /* tsc[31:0] */
post_code(0xa7)
diff --git a/src/cpu/amd/socket_G34/Kconfig b/src/cpu/amd/socket_G34/Kconfig index abc9726..52fb1317 100644 --- a/src/cpu/amd/socket_G34/Kconfig +++ b/src/cpu/amd/socket_G34/Kconfig @@ -22,8 +22,24 @@ hex default 0x18
-config XIP_ROM_SIZE +config DCACHE_RAM_BASE hex - default 0x80000 + default 0x30000 + +config DCACHE_RAM_SIZE + hex + default 0x30000 + +config DCACHE_BSP_STACK_SIZE + hex + default 0x10000 + +config DCACHE_BSP_TOP_STACK_SLUSH + hex + default 0x2000 + +config DCACHE_AP_STACK_SIZE + hex + default 0x500
endif