Timothy Pearson (tpearson@raptorengineeringinc.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/12006
-gerrit
commit 2842d76725d9560f66e84b5a8243d50bfdfbab80 Author: Timothy Pearson tpearson@raptorengineeringinc.com Date: Thu Jun 25 18:08:53 2015 -0500
northbridge/amd/amdmct/mct_ddr3: Attempt to recover from phy training errors
AMD's automatic phy phase detection hardware is very fragile and often produces incorrect results. Attempt to recover from obvious phase locking errors by retrying phy training on the failing link.
Change-Id: Ia2c3022534c9ad44714eef6e118869f054bd9f6b Signed-off-by: Timothy Pearson tpearson@raptorengineeringinc.com --- src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c | 68 +++++++++++++++++++++------ src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c | 36 +++++++++++--- 2 files changed, 83 insertions(+), 21 deletions(-)
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c index 5107fee..b3572b1 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c @@ -14,11 +14,11 @@ * GNU General Public License for more details. */
-static void AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, +static uint8_t AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass); -static void AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, +static uint8_t AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass); -static void AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, +static uint8_t AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass); static void EnableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); static void DisableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); @@ -96,11 +96,12 @@ static void DisableAutoRefresh_D(struct MCTStatStruc *pMCTstat, }
-static void PhyWLPass1(struct MCTStatStruc *pMCTstat, +static uint8_t PhyWLPass1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct) { u8 dimm; u16 DIMMValid; + uint8_t status = 0; void *DCTPtr;
dct &= 1; @@ -117,19 +118,22 @@ static void PhyWLPass1(struct MCTStatStruc *pMCTstat, PrepareC_DCT(pMCTstat, pDCTstat, dct); for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm ++) { if (DIMMValid & (1 << (dimm << 1))) { - AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, FirstPass); - AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, FirstPass); - AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, FirstPass); + status |= AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, FirstPass); + status |= AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, FirstPass); + status |= AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, FirstPass); } } } + + return status; }
-static void PhyWLPass2(struct MCTStatStruc *pMCTstat, +static uint8_t PhyWLPass2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct) { u8 dimm; u16 DIMMValid; + uint8_t status = 0; void *DCTPtr;
dct &= 1; @@ -159,12 +163,14 @@ static void PhyWLPass2(struct MCTStatStruc *pMCTstat, DisableAutoRefresh_D(pMCTstat, pDCTstat); for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm ++) { if (DIMMValid & (1 << (dimm << 1))) { - AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, SecondPass); - AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, SecondPass); - AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, SecondPass); + status |= AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, SecondPass); + status |= AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, SecondPass); + status |= AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, SecondPass); } } } + + return status; }
static uint16_t fam15h_next_highest_memclk_freq(uint16_t memclk_freq) @@ -179,6 +185,8 @@ static uint16_t fam15h_next_highest_memclk_freq(uint16_t memclk_freq) static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t Pass) { + uint8_t status; + uint8_t timeout; uint16_t final_target_freq;
pDCTstat->C_MCTPtr = &(pDCTstat->s_C_MCTPtr); @@ -197,8 +205,21 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat, }
if (Pass == FirstPass) { - PhyWLPass1(pMCTstat, pDCTstat, 0); - PhyWLPass1(pMCTstat, pDCTstat, 1); + timeout = 0; + do { + status = 0; + timeout++; + status |= PhyWLPass1(pMCTstat, pDCTstat, 0); + status |= PhyWLPass1(pMCTstat, pDCTstat, 1); + if (status) + printk(BIOS_INFO, + "%s: Retrying write levelling due to invalid value(s) detected in first phase\n", + __func__); + } while (status && (timeout < 8)); + if (status) + printk(BIOS_INFO, + "%s: Uncorrectable invalid value(s) detected in first phase of write levelling\n", + __func__); }
if (Pass == SecondPass) { @@ -207,6 +228,7 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat, * NOTE: BIOS must program both DCTs to the same frequency. * NOTE: Fam15h steps the frequency, Fam10h slams the frequency. */ + uint8_t global_phy_training_status = 0; final_target_freq = pDCTstat->TargetFreq;
while (pDCTstat->Speed != final_target_freq) { @@ -215,12 +237,28 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat, else pDCTstat->TargetFreq = final_target_freq; SetTargetFreq(pMCTstat, pDCTstat); - PhyWLPass2(pMCTstat, pDCTstat, 0); - PhyWLPass2(pMCTstat, pDCTstat, 1); + timeout = 0; + do { + status = 0; + timeout++; + status |= PhyWLPass2(pMCTstat, pDCTstat, 0); + status |= PhyWLPass2(pMCTstat, pDCTstat, 1); + if (status) + printk(BIOS_INFO, + "%s: Retrying write levelling due to invalid value(s) detected in last phase\n", + __func__); + } while (status && (timeout < 8)); + global_phy_training_status |= status; }
pDCTstat->TargetFreq = final_target_freq;
+ if (global_phy_training_status) + printk(BIOS_WARNING, + "%s: Uncorrectable invalid value(s) detected in second phase of write levelling; " + "continuing but system may be unstable!\n", + __func__); + uint8_t dct; for (dct = 0; dct < 2; dct++) { sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c index 48b72ca..496803e 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c @@ -50,7 +50,7 @@ static int32_t abs(int32_t val) { */
/*----------------------------------------------------------------------------- - * void AgesaHwWlPhase1(SPDStruct *SPDData,MCTStruct *MCTData, DCTStruct *DCTData, + * uint8_t AgesaHwWlPhase1(SPDStruct *SPDData,MCTStruct *MCTData, DCTStruct *DCTData, * u8 Dimm, u8 Pass) * * Description: @@ -67,7 +67,7 @@ static int32_t abs(int32_t val) { * OUT *----------------------------------------------------------------------------- */ -void AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, +uint8_t AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass) { u8 ByteLane; @@ -170,12 +170,15 @@ void AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTsta }
pDCTData->WLCriticalGrossDelayPrevPass = 0x1f; + + return 0; }
-void AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, +uint8_t AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass) { u8 ByteLane; + uint8_t status = 0; sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
if (is_fam15h()) { @@ -202,19 +205,38 @@ void AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTsta
/* Compensate for occasional noise/instability causing sporadic training failure */ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { + uint8_t faulty_value_detected = 0; uint16_t total_delay_seed = ((pDCTData->WLSeedGrossDelay[index+ByteLane] & 0x1f) << 5) | (pDCTData->WLSeedFineDelay[index+ByteLane] & 0x1f); uint16_t total_delay_phy = ((pDCTData->WLGrossDelay[index+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[index+ByteLane] & 0x1f); - if (abs(total_delay_phy - total_delay_seed) > 0x20) { - printk(BIOS_DEBUG, "%s: overriding faulty phy value (seed: %04x phy: %04x step: %04x)\n", __func__, + if (pass == FirstPass) { + /* Allow a somewhat higher step threshold on the first pass + * For the most part, as long as the phy isn't stepping + * several clocks at once the values are probably valid. + */ + if (abs(total_delay_phy - total_delay_seed) > 0x30) + faulty_value_detected = 1; + } else { + /* Stepping memory clocks between adjacent allowed frequencies + * should not yield large phy value differences... + */ + + if (abs(total_delay_phy - total_delay_seed) > 0x20) + faulty_value_detected = 1; + } + if (faulty_value_detected) { + printk(BIOS_INFO, "%s: overriding faulty phy value (seed: %04x phy: %04x step: %04x)\n", __func__, total_delay_seed, total_delay_phy, abs(total_delay_phy - total_delay_seed)); pDCTData->WLGrossDelay[index+ByteLane] = pDCTData->WLSeedGrossDelay[index+ByteLane]; pDCTData->WLFineDelay[index+ByteLane] = pDCTData->WLSeedFineDelay[index+ByteLane]; + status = 1; } } } + + return status; }
-void AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, +uint8_t AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass) { u8 ByteLane; @@ -281,6 +303,8 @@ void AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTsta * to the normal operating termination: */ prepareDimms(pMCTstat, pDCTstat, dct, dimm, FALSE); + + return 0; }
/*----------------------------------------------------------------------------