Resubmitting to include feedback from Peter... Thanks, Scott
MTRR related improvements for AMD family 10h and family 0Fh systems
-- When building for UMA, reduce the limit for DRAM below 4GB from E0000000 to C0000000. This is needed to accomodate the UMA frame buffer. -- Correct problem where msr C0010010 bits 21 and 22 (MtrrTom2En and Tom2ForceMemTypeWB) are not set consistently across cores. -- Enable TOM2 only if DRAM is present above 4GB. -- Use AMD Tom2ForceMemTypeWB feature to avoid the need for variable MTRR ranges above 4GB. -- Add above4gb flag argument to function x86_setup_var_mtrrs. Clearing this flag causes x86_setup_var_mtrrs() to omit MTRR ranges for DRAM above 4GB. AMD systems use this option to conserve MTRRs. -- Northbridge.c change to deduct UMA memory from DRAM size reported by ram_resource. This corrects a problem where mtrr.c generates an unexpected variable MTRR range. -- Correct problem causing build failure when CONFIG_GFXUMA=1 and CONFIG_VAR_MTRR_HOLE=0. -- Reserve the UMA DRAM range for AMD K8 as is already done for AMD family 10h. Tested with mahogany on ECS A780G-GM with 2GB and 4GB. Tested with mahogany_fam10 on ECS A780G-GM with 2GB and 4GB.
Signed-off-by: Scott Duplichan scott@notabs.org
Index: src/cpu/amd/mtrr/amd_mtrr.c =================================================================== --- src/cpu/amd/mtrr/amd_mtrr.c (revision 6064) +++ src/cpu/amd/mtrr/amd_mtrr.c (working copy) @@ -6,6 +6,10 @@ #include <cpu/x86/cache.h> #include <cpu/x86/msr.h>
+#if CONFIG_GFXUMA == 1 +extern uint64_t uma_memory_size; +#endif + static unsigned long resk(uint64_t value) { unsigned long resultk; @@ -107,14 +111,14 @@ unsigned long address_bits; struct mem_state state; unsigned long i; - msr_t msr; + msr_t msr, sys_cfg;
/* Enable the access to AMD RdDram and WrDram extension bits */ disable_cache(); - msr = rdmsr(SYSCFG_MSR); - msr.lo |= SYSCFG_MSR_MtrrFixDramModEn; - wrmsr(SYSCFG_MSR, msr); + sys_cfg = rdmsr(SYSCFG_MSR); + sys_cfg.lo |= SYSCFG_MSR_MtrrFixDramModEn; + wrmsr(SYSCFG_MSR, sys_cfg); enable_cache();
printk(BIOS_DEBUG, "\n"); @@ -146,13 +150,25 @@ /* Setup TOP_MEM */ msr.hi = state.mmio_basek >> 22; msr.lo = state.mmio_basek << 10; + + /* If UMA graphics is enabled, the frame buffer memory + * has been deducted from the size of memory below 4GB. + * When setting TOM, include UMA DRAM + */ + #if CONFIG_GFXUMA == 1 + msr.lo += uma_memory_size; + #endif wrmsr(TOP_MEM, msr);
+ sys_cfg.lo &= ~(SYSCFG_MSR_TOM2En | SYSCFG_MSR_TOM2WB); if(state.tomk > (4*1024*1024)) { - /* Setup TOP_MEM2 */ + /* DRAM above 4GB: set TOM2, SYSCFG_MSR_TOM2En + * and SYSCFG_MSR_TOM2WB + */ msr.hi = state.tomk >> 22; msr.lo = state.tomk << 10; wrmsr(TOP_MEM2, msr); + sys_cfg.lo |= SYSCFG_MSR_TOM2En | SYSCFG_MSR_TOM2WB; }
/* zero the IORR's before we enable to prevent @@ -167,10 +183,9 @@ * Enable the RdMem and WrMem bits in the fixed mtrrs. * Disable access to the RdMem and WrMem in the fixed mtrr. */ - msr = rdmsr(SYSCFG_MSR); - msr.lo |= SYSCFG_MSR_MtrrVarDramEn | SYSCFG_MSR_MtrrFixDramEn | SYSCFG_MSR_TOM2En; - msr.lo &= ~SYSCFG_MSR_MtrrFixDramModEn; - wrmsr(SYSCFG_MSR, msr); + sys_cfg.lo |= SYSCFG_MSR_MtrrVarDramEn | SYSCFG_MSR_MtrrFixDramEn; + sys_cfg.lo &= ~SYSCFG_MSR_MtrrFixDramModEn; + wrmsr(SYSCFG_MSR, sys_cfg);
enable_fixed_mtrr();
@@ -186,5 +201,5 @@ /* Now that I have mapped what is memory and what is not * Setup the mtrrs so we can cache the memory. */ - x86_setup_var_mtrrs(address_bits); + x86_setup_var_mtrrs(address_bits, 0); } Index: src/cpu/x86/mtrr/mtrr.c =================================================================== --- src/cpu/x86/mtrr/mtrr.c (revision 6064) +++ src/cpu/x86/mtrr/mtrr.c (working copy) @@ -230,7 +230,8 @@
static unsigned int range_to_mtrr(unsigned int reg, unsigned long range_startk, unsigned long range_sizek, - unsigned long next_range_startk, unsigned char type, unsigned address_bits) + unsigned long next_range_startk, unsigned char type, + unsigned int address_bits, unsigned int above4gb) { if (!range_sizek) { /* If there's no MTRR hole, this function will bail out @@ -263,7 +264,12 @@ (type==MTRR_TYPE_UNCACHEABLE)?"UC": ((type==MTRR_TYPE_WRBACK)?"WB":"Other") ); - set_var_mtrr(reg++, range_startk, sizek, type, address_bits); + + /* if range is above 4GB, MTRR is needed + * only if above4gb flag is set + */ + if (range_startk < 0x100000000ull / 1024 || above4gb) + set_var_mtrr(reg++, range_startk, sizek, type, address_bits); range_startk += sizek; range_sizek -= sizek; if (reg >= BIOS_MTRRS) { @@ -308,10 +314,9 @@ struct var_mtrr_state { unsigned long range_startk, range_sizek; unsigned int reg; -#if CONFIG_VAR_MTRR_HOLE unsigned long hole_startk, hole_sizek; -#endif - unsigned address_bits; + unsigned int address_bits; + unsigned int above4gb; /* Set if MTRRs are needed for DRAM above 4GB */ };
void set_var_mtrr_resource(void *gp, struct device *dev, struct resource *res) @@ -344,17 +349,17 @@ } #endif state->reg = range_to_mtrr(state->reg, state->range_startk, - state->range_sizek, basek, MTRR_TYPE_WRBACK, state->address_bits); + state->range_sizek, basek, MTRR_TYPE_WRBACK, + state->address_bits, state->above4gb); #if CONFIG_VAR_MTRR_HOLE state->reg = range_to_mtrr(state->reg, state->hole_startk, - state->hole_sizek, basek, MTRR_TYPE_UNCACHEABLE, state->address_bits); + state->hole_sizek, basek, MTRR_TYPE_UNCACHEABLE, + state->address_bits, state->above4gb); #endif state->range_startk = 0; state->range_sizek = 0; -#if CONFIG_VAR_MTRR_HOLE - state->hole_startk = 0; - state->hole_sizek = 0; -#endif + state->hole_startk = 0; + state->hole_sizek = 0; } /* Allocate an msr */ printk(BIOS_SPEW, " Allocate an msr - basek = %08lx, sizek = %08lx,\n", basek, sizek); @@ -388,12 +393,15 @@
}
-void x86_setup_var_mtrrs(unsigned address_bits) +void x86_setup_var_mtrrs(unsigned int address_bits, unsigned int above4gb) /* this routine needs to know how many address bits a given processor * supports. CPUs get grumpy when you set too many bits in * their mtrr registers :( I would generically call cpuid here * and find out how many physically supported but some cpus are * buggy, and report more bits then they actually support. + * If above4gb flag is set, variable MTRR ranges must be used to + * set cacheability of DRAM above 4GB. If above4gb flag is clear, + * some other mechanism is controlling cacheability of DRAM above 4GB. */ { /* Try this the simple way of incrementally adding together @@ -408,34 +416,38 @@ */ var_state.range_startk = 0; var_state.range_sizek = 0; -#if CONFIG_VAR_MTRR_HOLE var_state.hole_startk = 0; var_state.hole_sizek = 0; -#endif var_state.reg = 0; var_state.address_bits = address_bits; + var_state.above4gb = above4gb;
search_global_resources( IORESOURCE_MEM | IORESOURCE_CACHEABLE, IORESOURCE_MEM | IORESOURCE_CACHEABLE, set_var_mtrr_resource, &var_state); + #if (CONFIG_GFXUMA == 1) /* UMA or SP. */ - // For now we assume the UMA space is at the end of memory + /* For now we assume the UMA space is at the end of memory below 4GB */ if (var_state.hole_startk || var_state.hole_sizek) { printk(BIOS_DEBUG, "Warning: Can't set up MTRR hole for UMA due to pre-existing MTRR hole.\n"); } else { +#if CONFIG_VAR_MTRR_HOLE // Increase the base range and set up UMA as an UC hole instead var_state.range_sizek += (uma_memory_size >> 10);
var_state.hole_startk = (uma_memory_base >> 10); var_state.hole_sizek = (uma_memory_size >> 10); +#endif } #endif /* Write the last range */ var_state.reg = range_to_mtrr(var_state.reg, var_state.range_startk, - var_state.range_sizek, 0, MTRR_TYPE_WRBACK, var_state.address_bits); + var_state.range_sizek, 0, MTRR_TYPE_WRBACK, + var_state.address_bits, var_state.above4gb); #if CONFIG_VAR_MTRR_HOLE var_state.reg = range_to_mtrr(var_state.reg, var_state.hole_startk, - var_state.hole_sizek, 0, MTRR_TYPE_UNCACHEABLE, var_state.address_bits); + var_state.hole_sizek, 0, MTRR_TYPE_UNCACHEABLE, + var_state.address_bits, var_state.above4gb); #endif printk(BIOS_DEBUG, "DONE variable MTRRs\n"); printk(BIOS_DEBUG, "Clear out the extra MTRR's\n"); @@ -449,10 +461,11 @@ post_code(0x6A); }
+ void x86_setup_mtrrs(unsigned address_bits) { x86_setup_fixed_mtrrs(); - x86_setup_var_mtrrs(address_bits); + x86_setup_var_mtrrs(address_bits, 1); }
Index: src/include/cpu/amd/mtrr.h =================================================================== --- src/include/cpu/amd/mtrr.h (revision 6064) +++ src/include/cpu/amd/mtrr.h (working copy) @@ -8,6 +8,7 @@ #define MTRR_WRITE_MEM (1 << 3)
#define SYSCFG_MSR 0xC0010010 +#define SYSCFG_MSR_TOM2WB (1 << 22) #define SYSCFG_MSR_TOM2En (1 << 21) #define SYSCFG_MSR_MtrrVarDramEn (1 << 20) #define SYSCFG_MSR_MtrrFixDramModEn (1 << 19) Index: src/include/cpu/x86/mtrr.h =================================================================== --- src/include/cpu/x86/mtrr.h (revision 6064) +++ src/include/cpu/x86/mtrr.h (working copy) @@ -37,7 +37,7 @@ #if !defined (ASSEMBLY) && !defined(__PRE_RAM__) #include <device/device.h> void enable_fixed_mtrr(void); -void x86_setup_var_mtrrs(unsigned address_bits); +void x86_setup_var_mtrrs(unsigned int address_bits, unsigned int above4gb); void x86_setup_mtrrs(unsigned address_bits); int x86_mtrr_check(void); void set_var_mtrr_resource(void *gp, struct device *dev, struct resource *res); Index: src/northbridge/amd/amdfam10/northbridge.c =================================================================== --- src/northbridge/amd/amdfam10/northbridge.c (revision 6064) +++ src/northbridge/amd/amdfam10/northbridge.c (working copy) @@ -1060,6 +1060,12 @@ sizek -= (4*1024*1024 - mmio_basek); } } + +#if CONFIG_GFXUMA == 1 + /* Deduct uma memory before reporting because + * this is what the mtrr code expects */ + sizek -= uma_memory_size / 1024; +#endif ram_resource(dev, (idx | i), basek, sizek); idx += 0x10; #if CONFIG_WRITE_HIGH_TABLES==1 Index: src/northbridge/amd/amdk8/northbridge.c =================================================================== --- src/northbridge/amd/amdk8/northbridge.c (revision 6064) +++ src/northbridge/amd/amdk8/northbridge.c (working copy) @@ -822,12 +822,25 @@ #endif
#if CONFIG_WRITE_HIGH_TABLES==1 -#define HIGH_TABLES_SIZE 64 // maximum size of high tables in KB +#define HIGH_TABLES_SIZE 64 /* maximum size of high tables in KB */ extern uint64_t high_tables_base, high_tables_size; +#endif + #if CONFIG_GFXUMA == 1 extern uint64_t uma_memory_base, uma_memory_size; + +static void add_uma_resource(struct device *dev, int index) +{ + struct resource *resource; + + printk(BIOS_DEBUG, "Adding UMA memory area\n"); + resource = new_resource(dev, index); + resource->base = (resource_t) uma_memory_base; + resource->size = (resource_t) uma_memory_size; + resource->flags = IORESOURCE_MEM | IORESOURCE_RESERVE | + IORESOURCE_FIXED | IORESOURCE_STORED | IORESOURCE_ASSIGNED; +} #endif -#endif
static void amdk8_domain_set_resources(device_t dev) { @@ -1040,6 +1053,11 @@ /* If sizek == 0, it was split at mmio_basek without a hole. * Don't create an empty ram_resource. */ +#if CONFIG_GFXUMA == 1 + /* Deduct uma memory before reporting because + * this is what the mtrr code expects */ + sizek -= uma_memory_size / 1024; +#endif if (sizek) ram_resource(dev, (idx | i), basek, sizek); idx += 0x10; @@ -1057,6 +1075,10 @@ } #endif } + +#if CONFIG_GFXUMA == 1 + add_uma_resource(dev, 7); +#endif assign_resources(dev->link_list);
} Index: src/northbridge/amd/amdmct/wrappers/mcti.h =================================================================== --- src/northbridge/amd/amdmct/wrappers/mcti.h (revision 6064) +++ src/northbridge/amd/amdmct/wrappers/mcti.h (working copy) @@ -43,12 +43,6 @@ #endif
/*---------------------------------------------------------------------------- -COMMENT OUT ALL BUT 1 -----------------------------------------------------------------------------*/ -#define UMA_SUPPORT 0 /*Not supported */ -//#define UMA_SUPPORT 1 /*Supported */ - -/*---------------------------------------------------------------------------- UPDATE AS NEEDED ----------------------------------------------------------------------------*/ #ifndef MAX_NODES_SUPPORTED Index: src/northbridge/amd/amdmct/wrappers/mcti_d.c =================================================================== --- src/northbridge/amd/amdmct/wrappers/mcti_d.c (revision 6064) +++ src/northbridge/amd/amdmct/wrappers/mcti_d.c (working copy) @@ -64,16 +64,16 @@ //val = 2; /* S4 (Unbuffered SO-DIMMS) */ break; case NV_BYPMAX: -#if (UMA_SUPPORT == 0) +#if (CONFIG_GFXUMA == 0) val = 4; -#elif (UMA_SUPPORT == 1) +#elif (CONFIG_GFXUMA == 1) val = 7; #endif break; case NV_RDWRQBYP: -#if (UMA_SUPPORT == 0) +#if (CONFIG_GFXUMA == 0) val = 2; -#elif (UMA_SUPPORT == 1) +#elif (CONFIG_GFXUMA == 1) val = 3; #endif break; @@ -113,9 +113,9 @@ //val = 1; /* Enable */ break; case NV_BurstLen32: -#if (UMA_SUPPORT == 0) +#if (CONFIG_GFXUMA == 0) val = 0; /* 64 byte mode */ -#elif (UMA_SUPPORT == 1) +#elif (CONFIG_GFXUMA == 1) val = 1; /* 32 byte mode */ #endif break; @@ -132,13 +132,17 @@ //val = 1; /* enable */ break; case NV_BottomIO: +#if (CONFIG_GFXUMA == 0) val = 0xE0; /* address bits [31:24] */ +#elif (CONFIG_GFXUMA == 1) + val = 0xC0; /* address bits [31:24] */ +#endif break; case NV_BottomUMA: -#if (UMA_SUPPORT == 0) +#if (CONFIG_GFXUMA == 0) val = 0xE0; /* address bits [31:24] */ -#elif (UMA_SUPPORT == 1) - val = 0xB0; /* address bits [31:24] */ +#elif (CONFIG_GFXUMA == 1) + val = 0xC0; /* address bits [31:24] */ #endif break; case NV_ECC: