[coreboot-gerrit] Patch set updated for coreboot: northbridge/amd/amdmct/mct_ddr3: Attempt to recover from phy training errors

Timothy Pearson (tpearson@raptorengineeringinc.com) gerrit at coreboot.org
Sun Nov 15 01:19:16 CET 2015


Timothy Pearson (tpearson at raptorengineeringinc.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/12006

-gerrit

commit 60622f9d8f96561e977e5077d723307824ab8a27
Author: Timothy Pearson <tpearson at raptorengineeringinc.com>
Date:   Thu Jun 25 18:08:53 2015 -0500

    northbridge/amd/amdmct/mct_ddr3: Attempt to recover from phy training errors
    
    AMD's automatic phy phase detection hardware is very fragile and often
    produces incorrect results.  Attempt to recover from obvious phase
    locking errors by retrying phy training on the failing link.
    
    Change-Id: Ia2c3022534c9ad44714eef6e118869f054bd9f6b
    Signed-off-by: Timothy Pearson <tpearson at raptorengineeringinc.com>
---
 src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c  | 68 +++++++++++++++++++++------
 src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c | 36 +++++++++++---
 2 files changed, 83 insertions(+), 21 deletions(-)

diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
index 5107fee..b3572b1 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
@@ -14,11 +14,11 @@
  * GNU General Public License for more details.
  */
 
-static void AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat,
+static uint8_t AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat,
 					struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass);
-static void AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat,
+static uint8_t AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat,
 					struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass);
-static void AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat,
+static uint8_t AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat,
 					struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass);
 static void EnableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
 static void DisableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
@@ -96,11 +96,12 @@ static void DisableAutoRefresh_D(struct MCTStatStruc *pMCTstat,
 }
 
 
-static void PhyWLPass1(struct MCTStatStruc *pMCTstat,
+static uint8_t PhyWLPass1(struct MCTStatStruc *pMCTstat,
 					struct DCTStatStruc *pDCTstat, u8 dct)
 {
 	u8 dimm;
 	u16 DIMMValid;
+	uint8_t status = 0;
 	void *DCTPtr;
 
 	dct &= 1;
@@ -117,19 +118,22 @@ static void PhyWLPass1(struct MCTStatStruc *pMCTstat,
 		PrepareC_DCT(pMCTstat, pDCTstat, dct);
 		for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm ++) {
 			if (DIMMValid & (1 << (dimm << 1))) {
-				AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, FirstPass);
-				AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, FirstPass);
-				AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, FirstPass);
+				status |= AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, FirstPass);
+				status |= AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, FirstPass);
+				status |= AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, FirstPass);
 			}
 		}
 	}
+
+	return status;
 }
 
-static void PhyWLPass2(struct MCTStatStruc *pMCTstat,
+static uint8_t PhyWLPass2(struct MCTStatStruc *pMCTstat,
 					struct DCTStatStruc *pDCTstat, u8 dct)
 {
 	u8 dimm;
 	u16 DIMMValid;
+	uint8_t status = 0;
 	void *DCTPtr;
 
 	dct &= 1;
@@ -159,12 +163,14 @@ static void PhyWLPass2(struct MCTStatStruc *pMCTstat,
 		DisableAutoRefresh_D(pMCTstat, pDCTstat);
 		for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm ++) {
 			if (DIMMValid & (1 << (dimm << 1))) {
-				AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, SecondPass);
-				AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, SecondPass);
-				AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, SecondPass);
+				status |= AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, SecondPass);
+				status |= AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, SecondPass);
+				status |= AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, SecondPass);
 			}
 		}
 	}
+
+	return status;
 }
 
 static uint16_t fam15h_next_highest_memclk_freq(uint16_t memclk_freq)
@@ -179,6 +185,8 @@ static uint16_t fam15h_next_highest_memclk_freq(uint16_t memclk_freq)
 static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
 					struct DCTStatStruc *pDCTstat, uint8_t Pass)
 {
+	uint8_t status;
+	uint8_t timeout;
 	uint16_t final_target_freq;
 
 	pDCTstat->C_MCTPtr  = &(pDCTstat->s_C_MCTPtr);
@@ -197,8 +205,21 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
 	}
 
 	if (Pass == FirstPass) {
-		PhyWLPass1(pMCTstat, pDCTstat, 0);
-		PhyWLPass1(pMCTstat, pDCTstat, 1);
+		timeout = 0;
+		do {
+			status = 0;
+			timeout++;
+			status |= PhyWLPass1(pMCTstat, pDCTstat, 0);
+			status |= PhyWLPass1(pMCTstat, pDCTstat, 1);
+			if (status)
+				printk(BIOS_INFO,
+					"%s: Retrying write levelling due to invalid value(s) detected in first phase\n",
+					__func__);
+		} while (status && (timeout < 8));
+		if (status)
+			printk(BIOS_INFO,
+				"%s: Uncorrectable invalid value(s) detected in first phase of write levelling\n",
+				__func__);
 	}
 
 	if (Pass == SecondPass) {
@@ -207,6 +228,7 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
 			 * NOTE: BIOS must program both DCTs to the same frequency.
 			 * NOTE: Fam15h steps the frequency, Fam10h slams the frequency.
 			 */
+			uint8_t global_phy_training_status = 0;
 			final_target_freq = pDCTstat->TargetFreq;
 
 			while (pDCTstat->Speed != final_target_freq) {
@@ -215,12 +237,28 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
 				else
 					pDCTstat->TargetFreq = final_target_freq;
 				SetTargetFreq(pMCTstat, pDCTstat);
-				PhyWLPass2(pMCTstat, pDCTstat, 0);
-				PhyWLPass2(pMCTstat, pDCTstat, 1);
+				timeout = 0;
+				do {
+					status = 0;
+					timeout++;
+					status |= PhyWLPass2(pMCTstat, pDCTstat, 0);
+					status |= PhyWLPass2(pMCTstat, pDCTstat, 1);
+					if (status)
+						printk(BIOS_INFO,
+							"%s: Retrying write levelling due to invalid value(s) detected in last phase\n",
+							__func__);
+				} while (status && (timeout < 8));
+				global_phy_training_status |= status;
 			}
 
 			pDCTstat->TargetFreq = final_target_freq;
 
+			if (global_phy_training_status)
+				printk(BIOS_WARNING,
+					"%s: Uncorrectable invalid value(s) detected in second phase of write levelling; "
+					"continuing but system may be unstable!\n",
+					__func__);
+
 			uint8_t dct;
 			for (dct = 0; dct < 2; dct++) {
 				sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
index 48b72ca..496803e 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
@@ -50,7 +50,7 @@ static int32_t abs(int32_t val) {
  */
 
 /*-----------------------------------------------------------------------------
- * void AgesaHwWlPhase1(SPDStruct *SPDData,MCTStruct *MCTData, DCTStruct *DCTData,
+ * uint8_t AgesaHwWlPhase1(SPDStruct *SPDData,MCTStruct *MCTData, DCTStruct *DCTData,
  *                  u8 Dimm, u8 Pass)
  *
  *  Description:
@@ -67,7 +67,7 @@ static int32_t abs(int32_t val) {
  *       OUT
  *-----------------------------------------------------------------------------
  */
-void AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
+uint8_t AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
 		u8 dct, u8 dimm, u8 pass)
 {
 	u8 ByteLane;
@@ -170,12 +170,15 @@ void AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTsta
 	}
 
 	pDCTData->WLCriticalGrossDelayPrevPass = 0x1f;
+
+	return 0;
 }
 
-void AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
+uint8_t AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
 		u8 dct, u8 dimm, u8 pass)
 {
 	u8 ByteLane;
+	uint8_t status = 0;
 	sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
 
 	if (is_fam15h()) {
@@ -202,19 +205,38 @@ void AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTsta
 
 		/* Compensate for occasional noise/instability causing sporadic training failure */
 		for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
+			uint8_t faulty_value_detected = 0;
 			uint16_t total_delay_seed = ((pDCTData->WLSeedGrossDelay[index+ByteLane] & 0x1f) << 5) | (pDCTData->WLSeedFineDelay[index+ByteLane] & 0x1f);
 			uint16_t total_delay_phy = ((pDCTData->WLGrossDelay[index+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[index+ByteLane] & 0x1f);
-			if (abs(total_delay_phy - total_delay_seed) > 0x20) {
-				printk(BIOS_DEBUG, "%s: overriding faulty phy value (seed: %04x phy: %04x step: %04x)\n", __func__,
+			if (pass == FirstPass) {
+				/* Allow a somewhat higher step threshold on the first pass
+				 * For the most part, as long as the phy isn't stepping
+				 * several clocks at once the values are probably valid.
+				 */
+				if (abs(total_delay_phy - total_delay_seed) > 0x30)
+					faulty_value_detected = 1;
+			} else {
+				/* Stepping memory clocks between adjacent allowed frequencies
+				 *  should not yield large phy value differences...
+				 */
+
+				if (abs(total_delay_phy - total_delay_seed) > 0x20)
+					faulty_value_detected = 1;
+			}
+			if (faulty_value_detected) {
+				printk(BIOS_INFO, "%s: overriding faulty phy value (seed: %04x phy: %04x step: %04x)\n", __func__,
 					total_delay_seed, total_delay_phy, abs(total_delay_phy - total_delay_seed));
 				pDCTData->WLGrossDelay[index+ByteLane] = pDCTData->WLSeedGrossDelay[index+ByteLane];
 				pDCTData->WLFineDelay[index+ByteLane] = pDCTData->WLSeedFineDelay[index+ByteLane];
+				status = 1;
 			}
 		}
 	}
+
+	return status;
 }
 
-void AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
+uint8_t AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
 		u8 dct, u8 dimm, u8 pass)
 {
 	u8 ByteLane;
@@ -281,6 +303,8 @@ void AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTsta
 	 * to the normal operating termination:
 	 */
 	prepareDimms(pMCTstat, pDCTstat, dct, dimm, FALSE);
+
+	return 0;
 }
 
 /*----------------------------------------------------------------------------



More information about the coreboot-gerrit mailing list