[coreboot-gerrit] Patch set updated for coreboot: northbridge/amd/amdmct: Fix broken AMD K10 DDR3 memory initalization

Timothy Pearson (tpearson@raptorengineeringinc.com) gerrit at coreboot.org
Sun Oct 25 23:04:18 CET 2015


Timothy Pearson (tpearson at raptorengineeringinc.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/11941

-gerrit

commit e58ef559f2339a08af50e13e5525bae2ad6f4627
Author: Timothy Pearson <tpearson at raptorengineeringinc.com>
Date:   Sat Sep 5 17:55:58 2015 -0500

    northbridge/amd/amdmct: Fix broken AMD K10 DDR3 memory initalization
    
    The native AMD DDR3 memory initialization code was riddled with
    numerous errors and was missing critical configuration code segments;
    this made it so that DDR3 memory did not function on most AMD boards.
    
    This patch corrects enough of the DDR3 initialization such that
    UDIMMs can be used on most channels of G34 Opteron boards.  Further
    work is needed to fix the broken RDIMM code and remaining UDMM issues.
    
    Change-Id: Iab690db769e820600693ad1170085623b177b94e
    Signed-off-by: Timothy Pearson <tpearson at raptorengineeringinc.com>
---
 src/northbridge/amd/amdfam10/raminit_amdmct.c   |   2 +
 src/northbridge/amd/amdmct/mct/mct_d.c          |   1 -
 src/northbridge/amd/amdmct/mct_ddr3/mct_d.c     | 177 +++++-
 src/northbridge/amd/amdmct/mct_ddr3/mct_d.h     |   8 +-
 src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h |  87 +--
 src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c  |   6 +-
 src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c  | 806 +++++++++++++-----------
 src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c    |   6 +-
 src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c  |  14 +-
 src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c  |   3 +-
 src/northbridge/amd/amdmct/mct_ddr3/mctproc.c   |  19 +-
 src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c    |   5 +-
 src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c    | 800 ++++++++++++-----------
 src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c  |  18 +-
 src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c  |  13 +-
 src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c   |   7 +-
 src/northbridge/amd/amdmct/mct_ddr3/mctwl.c     |  42 +-
 src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c   | 267 ++++----
 src/northbridge/amd/amdmct/wrappers/mcti_d.c    | 110 +---
 19 files changed, 1252 insertions(+), 1139 deletions(-)

diff --git a/src/northbridge/amd/amdfam10/raminit_amdmct.c b/src/northbridge/amd/amdfam10/raminit_amdmct.c
index a0d47f4..a585fae 100644
--- a/src/northbridge/amd/amdfam10/raminit_amdmct.c
+++ b/src/northbridge/amd/amdfam10/raminit_amdmct.c
@@ -28,12 +28,14 @@ static  void print_tx(const char *strval, u32 val)
 }
 #endif
 
+#if (CONFIG_DIMM_SUPPORT & 0x000F)!=0x0005 /* not needed for AMD_FAM10_DDR3 */
 static  void print_t(const char *strval)
 {
 #if CONFIG_DEBUG_RAM_SETUP
 	printk(BIOS_DEBUG, "%s", strval);
 #endif
 }
+#endif
 
 static  void print_tf(const char *func, const char *strval)
 {
diff --git a/src/northbridge/amd/amdmct/mct/mct_d.c b/src/northbridge/amd/amdmct/mct/mct_d.c
index 3dec934..88910e2 100644
--- a/src/northbridge/amd/amdmct/mct/mct_d.c
+++ b/src/northbridge/amd/amdmct/mct/mct_d.c
@@ -542,7 +542,6 @@ static void HTMemMapInit_D(struct MCTStatStruc *pMCTstat,
 		pDCTstat = pDCTstatA + Node;
 		devx = pDCTstat->dev_map;
 		DramSelBaseAddr = 0;
-		pDCTstat = pDCTstatA + Node;
 		if (!pDCTstat->GangedMode) {
 			DramSelBaseAddr = pDCTstat->NodeSysLimit - pDCTstat->DCTSysLimit;
 			/*In unganged mode, we must add DCT0 and DCT1 to DCTSysLimit */
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
index 71a6be8..4677cc3 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
@@ -214,6 +214,8 @@ static const u8 Table_DQSRcvEn_Offset[] = {0x00,0x01,0x10,0x11,0x2};
 static const u8 Tab_L1CLKDis[]  = {0x20, 0x20, 0x10, 0x10, 0x08, 0x08, 0x04, 0x04};
 static const u8 Tab_AM3CLKDis[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00};
 static const u8 Tab_S1CLKDis[]  = {0xA2, 0xA2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+static const u8 Tab_C32CLKDis[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00};	/* Enable CS0 - CS3 clocks (DIMM0 - DIMM1) */
+static const u8 Tab_G34CLKDis[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00};	/* Enable CS0 - CS3 clocks (DIMM0 - DIMM1) */
 static const u8 Tab_ManualCLKDis[]= {0x10, 0x04, 0x08, 0x20, 0x00, 0x00, 0x00, 0x00};
 
 static const u8 Table_Comp_Rise_Slew_20x[] = {7, 3, 2, 2, 0xFF};
@@ -277,6 +279,11 @@ restartinit:
 	for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
 		struct DCTStatStruc *pDCTstat;
 		pDCTstat = pDCTstatA + Node;
+
+		/* Zero out data structures to avoid false detection of DIMMs */
+		memset(pDCTstat, 0, sizeof(struct DCTStatStruc));
+
+		/* Initialize data structures */
 		pDCTstat->Node_ID = Node;
 		pDCTstat->dev_host = PA_HOST(Node);
 		pDCTstat->dev_map = PA_MAP(Node);
@@ -284,17 +291,22 @@ restartinit:
 		pDCTstat->dev_nbmisc = PA_NBMISC(Node);
 		pDCTstat->NodeSysBase = node_sys_base;
 
+		printk(BIOS_DEBUG, "%s: mct_init Node %d\n", __func__, Node);
 		mct_init(pMCTstat, pDCTstat);
 		mctNodeIDDebugPort_D();
 		pDCTstat->NodePresent = NodePresent_D(Node);
 		if (pDCTstat->NodePresent) {		/* See if Node is there*/
+			printk(BIOS_DEBUG, "%s: clear_legacy_Mode\n", __func__);
 			clear_legacy_Mode(pMCTstat, pDCTstat);
 			pDCTstat->LogicalCPUID = mctGetLogicalCPUID_D(Node);
 
+			printk(BIOS_DEBUG, "%s: mct_InitialMCT_D\n", __func__);
 			mct_InitialMCT_D(pMCTstat, pDCTstat);
 
+			printk(BIOS_DEBUG, "%s: mctSMBhub_Init\n", __func__);
 			mctSMBhub_Init(Node);		/* Switch SMBUS crossbar to proper node*/
 
+			printk(BIOS_DEBUG, "%s: mct_initDCT\n", __func__);
 			mct_initDCT(pMCTstat, pDCTstat);
 			if (pDCTstat->ErrCode == SC_FatalErr) {
 				goto fatalexit;		/* any fatal errors?*/
@@ -345,6 +357,7 @@ restartinit:
 
 	mct_FinalMCT_D(pMCTstat, pDCTstatA);
 	printk(BIOS_DEBUG, "mctAutoInitMCT_D Done: Global Status: %x\n", pMCTstat->GStatus);
+
 	return;
 
 fatalexit:
@@ -560,7 +573,6 @@ static void HTMemMapInit_D(struct MCTStatStruc *pMCTstat,
 		pDCTstat = pDCTstatA + Node;
 		devx = pDCTstat->dev_map;
 		DramSelBaseAddr = 0;
-		pDCTstat = pDCTstatA + Node; /* ??? */
 		if (!pDCTstat->GangedMode) {
 			DramSelBaseAddr = pDCTstat->NodeSysLimit - pDCTstat->DCTSysLimit;
 			/*In unganged mode, we must add DCT0 and DCT1 to DCTSysLimit */
@@ -645,6 +657,7 @@ static void HTMemMapInit_D(struct MCTStatStruc *pMCTstat,
 		devx = pDCTstat->dev_map;
 
 		if (pDCTstat->NodePresent) {
+			printk(BIOS_DEBUG, " Copy dram map from Node 0 to Node %02x \n", Node);
 			reg = 0x40;		/*Dram Base 0*/
 			do {
 				val = Get_NB32(dev, reg);
@@ -1162,7 +1175,7 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat,
 
 	/* Program DRAM Timing values */
 	DramTimingLo = 0;	/* Dram Timing Low init */
-	val = pDCTstat->CASL - 2; /* pDCTstat.CASL to reg. definition */
+	val = pDCTstat->CASL - 4; /* pDCTstat.CASL to reg. definition */
 	DramTimingLo |= val;
 
 	val = pDCTstat->Trcd - Bias_TrcdT;
@@ -1406,18 +1419,16 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat,
 	else if (tCKproposed16x <= 24) {
 		pDCTstat->TargetFreq = 6;
 		tCKproposed16x = 24;
-	}
-	else if (tCKproposed16x <= 30) {
+	} else if (tCKproposed16x <= 30) {
 		pDCTstat->TargetFreq = 5;
 		tCKproposed16x = 30;
-	}
-	else {
+	} else {
 		pDCTstat->TargetFreq = 4;
 		tCKproposed16x = 40;
 	}
 	/* Running through this loop twice:
 	   - First time find tCL at target frequency
-	   - Second tim find tCL at 400MHz */
+	   - Second time find tCL at 400MHz */
 
 	for (;;) {
 		CLT_Fail = 0;
@@ -1451,7 +1462,7 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat,
 			CLT_Fail = 1;
 		/* get CL and T */
 		if (!CLT_Fail) {
-			bytex = CLactual - 2;
+			bytex = CLactual;
 			if (tCKproposed16x == 20)
 				byte = 7;
 			else if (tCKproposed16x == 24)
@@ -1632,7 +1643,7 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
 		val = 0x0f; /* recommended setting (default) */
 	DramConfigHi |= val << 24;
 
-	if (pDCTstat->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Bx))
+	if (pDCTstat->LogicalCPUID & (AMD_DR_Dx | AMD_DR_Cx | AMD_DR_Bx))
 		DramConfigHi |= 1 << DcqArbBypassEn;
 
 	/* Build MemClkDis Value from Dram Timing Lo and
@@ -1657,6 +1668,10 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
 				p = Tab_L1CLKDis;
 			else if (byte == PT_M2 || byte == PT_AS)
 				p = Tab_AM3CLKDis;
+			else if (byte == PT_C3)
+				p = Tab_C32CLKDis;
+			else if (byte == PT_GR)
+				p = Tab_G34CLKDis;
 			else
 				p = Tab_S1CLKDis;
 
@@ -2102,8 +2117,7 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat,
 				if (byte == JED_RDIMM || byte == JED_MiniRDIMM) {
 					RegDIMMPresent |= 1 << i;
 					pDCTstat->DimmRegistered[i] = 1;
-				}
-				else {
+				} else {
 					pDCTstat->DimmRegistered[i] = 0;
 				}
 				/* Check ECC capable */
@@ -2977,9 +2991,9 @@ static void mct_FinalMCT_D(struct MCTStatStruc *pMCTstat,
 		} else {	/* For Dx CPU */
 			val = 0x0CE00F00 | 1 << 29/* FlushWrOnStpGnt */;
 			if (!(pDCTstat->GangedMode))
-				val |= 0x20; /* MctWrLimit =  8 for Unganed mode */
+				val |= 0x20; /* MctWrLimit =  8 for Unganged mode */
 			else
-				val |= 0x40; /* MctWrLimit =  16 for ganed mode */
+				val |= 0x40; /* MctWrLimit =  16 for ganged mode */
 			Set_NB32(pDCTstat->dev_dct, 0x11C, val);
 
 			val = Get_NB32(pDCTstat->dev_dct, 0x1B0);
@@ -3414,6 +3428,138 @@ static void mct_BeforeDramInit_Prod_D(struct MCTStatStruc *pMCTstat,
 			Set_NB32(dev,  0x98 + reg_off, 0x0D000030);
 			Set_NB32(dev,  0x9C + reg_off, dword);
 			Set_NB32(dev,  0x98 + reg_off, 0x4D040F30);
+
+			/* FIXME
+			 * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
+			 * For now assume a maximum of 2 DIMMs per channel can be installed
+			 */
+			uint8_t MaxDimmsInstallable = 2;
+
+			/* Obtain number of DIMMs on channel */
+			uint8_t dimm_count = pDCTstat->MAdimms[i];
+			uint8_t rank_count_dimm0;
+			uint8_t rank_count_dimm1;
+			uint32_t odt_pattern_0;
+			uint32_t odt_pattern_1;
+			uint32_t odt_pattern_2;
+			uint32_t odt_pattern_3;
+
+			/* Select appropriate ODT pattern for installed DIMMs
+			 * Refer to the BKDG Rev. 3.62, page 120 onwards
+			 */
+			if (pDCTstat->C_DCTPtr[i]->Status[DCT_STATUS_REGISTERED]) {
+				if (MaxDimmsInstallable == 2) {
+					if (dimm_count == 1) {
+						/* 1 DIMM detected */
+						rank_count_dimm1 = pDCTstat->C_DCTPtr[i]->DimmRanks[1];
+						if (rank_count_dimm1 == 1) {
+							odt_pattern_0 = 0x00000000;
+							odt_pattern_1 = 0x00000000;
+							odt_pattern_2 = 0x00000000;
+							odt_pattern_3 = 0x00020000;
+						} else if (rank_count_dimm1 == 2) {
+							odt_pattern_0 = 0x00000000;
+							odt_pattern_1 = 0x00000000;
+							odt_pattern_2 = 0x00000000;
+							odt_pattern_3 = 0x02080000;
+						} else if (rank_count_dimm1 == 4) {
+							odt_pattern_0 = 0x00000000;
+							odt_pattern_1 = 0x00000000;
+							odt_pattern_2 = 0x020a0000;
+							odt_pattern_3 = 0x080a0000;
+						} else {
+							/* Fallback */
+							odt_pattern_0 = 0x00000000;
+							odt_pattern_1 = 0x00000000;
+							odt_pattern_2 = 0x00000000;
+							odt_pattern_3 = 0x00000000;
+						}
+					} else {
+						/* 2 DIMMs detected */
+						rank_count_dimm0 = pDCTstat->C_DCTPtr[i]->DimmRanks[0];
+						rank_count_dimm1 = pDCTstat->C_DCTPtr[i]->DimmRanks[1];
+						if ((rank_count_dimm0 < 4) && (rank_count_dimm1 < 4)) {
+							odt_pattern_0 = 0x00000000;
+							odt_pattern_1 = 0x01010202;
+							odt_pattern_2 = 0x00000000;
+							odt_pattern_3 = 0x09030603;
+						} else if ((rank_count_dimm0 < 4) && (rank_count_dimm1 == 4)) {
+							odt_pattern_0 = 0x01010000;
+							odt_pattern_1 = 0x01010a0a;
+							odt_pattern_2 = 0x01090000;
+							odt_pattern_3 = 0x01030e0b;
+						} else if ((rank_count_dimm0 == 4) && (rank_count_dimm1 < 4)) {
+							odt_pattern_0 = 0x00000202;
+							odt_pattern_1 = 0x05050202;
+							odt_pattern_2 = 0x00000206;
+							odt_pattern_3 = 0x0d070203;
+						} else if ((rank_count_dimm0 == 4) && (rank_count_dimm1 == 4)) {
+							odt_pattern_0 = 0x05050a0a;
+							odt_pattern_1 = 0x05050a0a;
+							odt_pattern_2 = 0x050d0a0e;
+							odt_pattern_3 = 0x05070a0b;
+						} else {
+							/* Fallback */
+							odt_pattern_0 = 0x00000000;
+							odt_pattern_1 = 0x00000000;
+							odt_pattern_2 = 0x00000000;
+							odt_pattern_3 = 0x00000000;
+						}
+					}
+				} else {
+					/* FIXME
+					 * 3 DIMMs per channel UNIMPLEMENTED
+					 */
+					odt_pattern_0 = 0x00000000;
+					odt_pattern_1 = 0x00000000;
+					odt_pattern_2 = 0x00000000;
+					odt_pattern_3 = 0x00000000;
+				}
+			} else {
+				if (MaxDimmsInstallable == 2) {
+					if (dimm_count == 1) {
+						/* 1 DIMM detected */
+						rank_count_dimm1 = pDCTstat->C_DCTPtr[i]->DimmRanks[1];
+						if (rank_count_dimm1 == 1) {
+							odt_pattern_0 = 0x00000000;
+							odt_pattern_1 = 0x00000000;
+							odt_pattern_2 = 0x00000000;
+							odt_pattern_3 = 0x00020000;
+						} else if (rank_count_dimm1 == 2) {
+							odt_pattern_0 = 0x00000000;
+							odt_pattern_1 = 0x00000000;
+							odt_pattern_2 = 0x00000000;
+							odt_pattern_3 = 0x02080000;
+						} else {
+							/* Fallback */
+							odt_pattern_0 = 0x00000000;
+							odt_pattern_1 = 0x00000000;
+							odt_pattern_2 = 0x00000000;
+							odt_pattern_3 = 0x00000000;
+						}
+					} else {
+						/* 2 DIMMs detected */
+						odt_pattern_0 = 0x00000000;
+						odt_pattern_1 = 0x01010202;
+						odt_pattern_2 = 0x00000000;
+						odt_pattern_3 = 0x09030603;
+					}
+				} else {
+					/* FIXME
+					 * 3 DIMMs per channel UNIMPLEMENTED
+					 */
+					odt_pattern_0 = 0x00000000;
+					odt_pattern_1 = 0x00000000;
+					odt_pattern_2 = 0x00000000;
+					odt_pattern_3 = 0x00000000;
+				}
+			}
+
+			/* Program ODT pattern */
+			Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x180, odt_pattern_1);
+			Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x181, odt_pattern_0);
+			Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x182, odt_pattern_3);
+			Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x183, odt_pattern_2);
 		}
 	}
 }
@@ -3657,6 +3803,7 @@ static void mct_BeforeDQSTrain_D(struct MCTStatStruc *pMCTstat,
 	}
 }
 
+/* Erratum 350 */
 static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat,
 					struct DCTStatStruc *pDCTstat, u8 dct)
 {
@@ -3692,11 +3839,11 @@ static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat,
 				mct_Read1LTestPattern_D(pMCTstat, pDCTstat, addr);	/* cache fills */
 
 				/* Write 0000_8000h to register F2x[1,0]9C_xD080F0C */
-				Set_NB32_index_wait(dev, 0x98 + reg_off, 0x4D080F0C, 0x00008000);
+				Set_NB32_index_wait(dev, 0x98 + reg_off, 0xD080F0C, 0x00008000);
 				mct_Wait(80); /* wait >= 300ns */
 
 				/* Write 0000_0000h to register F2x[1,0]9C_xD080F0C */
-				Set_NB32_index_wait(dev, 0x98 + reg_off, 0x4D080F0C, 0x00000000);
+				Set_NB32_index_wait(dev, 0x98 + reg_off, 0xD080F0C, 0x00000000);
 				mct_Wait(800); /* wait >= 2us */
 				break;
 			}
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
index d6e5fb4..987c0c8 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
@@ -499,7 +499,7 @@ struct DCTStatStruc {		/* A per Node structure*/
 		/* CHB DIMM0 Byte 0 - 7  TxDqs */
 		/* CHB DIMM1 Byte 0 - 7  TxDqs */
 		/* CHB DIMM1 Byte 0 - 7  TxDqs */
-	u8 CH_D_B_RCVRDLY[2][4][8];	/* [A/B] [DIMM0-3] [DQS] */
+	u16 CH_D_B_RCVRDLY[2][4][8];	/* [A/B] [DIMM0-3] [DQS] */
 		/* CHA DIMM 0 Receiver Enable Delay*/
 		/* CHA DIMM 1 Receiver Enable Delay*/
 		/* CHA DIMM 2 Receiver Enable Delay*/
@@ -509,7 +509,7 @@ struct DCTStatStruc {		/* A per Node structure*/
 		/* CHB DIMM 1 Receiver Enable Delay*/
 		/* CHB DIMM 2 Receiver Enable Delay*/
 		/* CHB DIMM 3 Receiver Enable Delay*/
-	u8 CH_D_BC_RCVRDLY[2][4];
+	u16 CH_D_BC_RCVRDLY[2][4];
 		/* CHA DIMM 0 - 4 Check Byte Receiver Enable Delay*/
 		/* CHB DIMM 0 - 4 Check Byte Receiver Enable Delay*/
 	u8 DIMMValidDCT[2];	/* DIMM# in DCT0*/
@@ -769,7 +769,7 @@ u8 mct_checkNumberOfDqsRcvEn_1Pass(u8 pass);
 u32 SetupDqsPattern_1PassA(u8 Pass);
 u32 SetupDqsPattern_1PassB(u8 Pass);
 u8 mct_Get_Start_RcvrEnDly_1Pass(u8 Pass);
-u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 RcvrEnDlyLimit, u8 Channel, u8 Receiver, u8 Pass);
+u16 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, u16 RcvrEnDlyLimit, u8 Channel, u8 Receiver, u8 Pass);
 void CPUMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
 void UMAMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
 uint64_t mctGetLogicalCPUID(u32 Node);
@@ -779,7 +779,7 @@ void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTs
 void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
 void TrainMaxReadLatency_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
 void mct_EndDQSTraining_D(struct MCTStatStruc *pMCTstat,struct DCTStatStruc *pDCTstatA);
-void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 FinalValue, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass);
+void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, u8 FinalValue, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass);
 void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel);
 void mctGet_PS_Cfg_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 dct);
 void InterleaveBanks_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct);
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h
index 60f98bc..c40ea1a 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h
@@ -2,6 +2,7 @@
  * This file is part of the coreboot project.
  *
  * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson at raptorengineeringinc.com>, Raptor Engineering
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -103,10 +104,10 @@ static void proc_CLFLUSH(u32 addr_hi)
 
 	__asm__ volatile (
 		/* clflush fs:[eax] */
-		"outb %%al, $0xed\n\t"	/* _EXECFENCE */
-		 "clflush %%fs:(%0)\n\t"
+		"outb %%al, $0xed\n\t"  /* _EXECFENCE */
+		"clflush %%fs:(%0)\n\t"
 		"mfence\n\t"
-		 ::"a" (addr_hi<<8)
+		::"a" (addr_hi<<8)
 	);
 }
 
@@ -141,6 +142,24 @@ static u32 read32_fs(u32 addr_lo)
 	return value;
 }
 
+static uint64_t read64_fs(uint32_t addr_lo)
+{
+	uint64_t value = 0;
+	uint32_t value_lo;
+	uint32_t value_hi;
+
+	__asm__ volatile (
+		"outb %%al, $0xed\n\t"  /* _EXECFENCE */
+		"mfence\n\t"
+		"movl %%fs:(%2), %0\n\t"
+		"movl %%fs:(%3), %1\n\t"
+		:"=c"(value_lo), "=d"(value_hi): "a" (addr_lo), "b" (addr_lo + 4) : "memory"
+	);
+	value |= value_lo;
+	value |= ((uint64_t)value_hi) << 32;
+	return value;
+}
+
 #ifdef UNUSED_CODE
 static u8 read8_fs(u32 addr_lo)
 {
@@ -210,68 +229,6 @@ static __attribute__((noinline)) void FlushDQSTestPattern_L18(u32 addr_lo)
 	);
 }
 
-static void ReadL18TestPattern(u32 addr_lo)
-{
-	/* set fs and use fs prefix to access the mem */
-	__asm__ volatile (
-		"outb %%al, $0xed\n\t"			/* _EXECFENCE */
-		"movl %%fs:-128(%%esi), %%eax\n\t" 	/* TestAddr cache line */
-		"movl %%fs:-64(%%esi), %%eax\n\t"	/* +1 */
-		"movl %%fs:(%%esi), %%eax\n\t"		/* +2 */
-		"movl %%fs:64(%%esi), %%eax\n\t"	/* +3 */
-
-		"movl %%fs:-128(%%edi), %%eax\n\t"	/* +4 */
-		"movl %%fs:-64(%%edi), %%eax\n\t"	/* +5 */
-		"movl %%fs:(%%edi), %%eax\n\t"		/* +6 */
-		"movl %%fs:64(%%edi), %%eax\n\t"	/* +7 */
-
-		"movl %%fs:-128(%%ebx), %%eax\n\t"	/* +8 */
-		"movl %%fs:-64(%%ebx), %%eax\n\t"	/* +9 */
-		"movl %%fs:(%%ebx), %%eax\n\t"		/* +10 */
-		"movl %%fs:64(%%ebx), %%eax\n\t"	/* +11 */
-
-		"movl %%fs:-128(%%ecx), %%eax\n\t"	/* +12 */
-		"movl %%fs:-64(%%ecx), %%eax\n\t"	/* +13 */
-		"movl %%fs:(%%ecx), %%eax\n\t"		/* +14 */
-		"movl %%fs:64(%%ecx), %%eax\n\t"	/* +15 */
-
-		"movl %%fs:-128(%%edx), %%eax\n\t"	/* +16 */
-		"movl %%fs:-64(%%edx), %%eax\n\t"	/* +17 */
-		"mfence\n\t"
-
-		 :: "a"(0), "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64),
-		    "d" (addr_lo +128+16*64), "S"(addr_lo+128),
-		    "D"(addr_lo+128+4*64)
-	);
-
-}
-
-static void ReadL9TestPattern(u32 addr_lo)
-{
-
-	/* set fs and use fs prefix to access the mem */
-	__asm__ volatile (
-		"outb %%al, $0xed\n\t"			/* _EXECFENCE */
-
-		"movl %%fs:-128(%%ecx), %%eax\n\t"	/* TestAddr cache line */
-		"movl %%fs:-64(%%ecx), %%eax\n\t"	/* +1 */
-		"movl %%fs:(%%ecx), %%eax\n\t"		/* +2 */
-		"movl %%fs:64(%%ecx), %%eax\n\t"	/* +3 */
-
-		"movl %%fs:-128(%%edx), %%eax\n\t"	/* +4 */
-		"movl %%fs:-64(%%edx), %%eax\n\t"	/* +5 */
-		"movl %%fs:(%%edx), %%eax\n\t"		/* +6 */
-		"movl %%fs:64(%%edx), %%eax\n\t"	/* +7 */
-
-		"movl %%fs:-128(%%ebx), %%eax\n\t"	/* +8 */
-		"mfence\n\t"
-
-		 :: "a"(0), "b" (addr_lo+128+8*64), "c"(addr_lo+128),
-		    "d"(addr_lo+128+4*64)
-	);
-
-}
-
 static void ReadMaxRdLat1CLTestPattern_D(u32 addr)
 {
 	SetUpperFSbase(addr);
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c b/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c
index ae1654c..99a2628 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c
@@ -2,6 +2,7 @@
  * This file is part of the coreboot project.
  *
  * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson at raptorengineeringinc.com>, Raptor Engineering
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -17,7 +18,7 @@
  * Foundation, Inc.
  */
 
-/* The socket type F (1207), Fr2, G (1207) are not tested.
+/* The socket type Fr2, G (1207) are not tested.
  */
 
 static void Get_ChannelPS_Cfg0_D(u8 MAAdimms, u8 Speed, u8 MAAload,
@@ -79,8 +80,7 @@ static void Get_ChannelPS_Cfg0_D( u8 MAAdimms, u8 Speed, u8 MAAload,
 			else
 				*AddrTmgCTL = 0x00353935;
 		}
-	}
-	else {
+	} else {
 		if(Speed == 4) {
 			*AddrTmgCTL = 0x00000000;
 			if (MAAdimms == 3)
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
index 404727b..cc2f43a 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
@@ -2,6 +2,7 @@
  * This file is part of the coreboot project.
  *
  * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson at raptorengineeringinc.com>, Raptor Engineering
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -22,13 +23,6 @@ static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
 				u8 scale, u8 ChipSel);
 static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
 				struct DCTStatStruc *pDCTstat, u8 ChipSel);
-static u8 MiddleDQS_D(u8 min, u8 max);
-static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat,
-				struct DCTStatStruc *pDCTstat,
-				u8 cs_start);
-static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat,
-				struct DCTStatStruc *pDCTstat,
-				u8 cs_start);
 static void WriteDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
 					struct DCTStatStruc *pDCTstat,
 					u32 TestAddr_lo);
@@ -43,31 +37,19 @@ static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat,
 					u32 addr_lo);
 static void SetTargetWTIO_D(u32 TestAddr);
 static void ResetTargetWTIO_D(void);
-static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
-					struct DCTStatStruc *pDCTstat,
-					u32 TestAddr_lo);
-static void mctEngDQSwindow_Save_D(struct MCTStatStruc *pMCTstat,
-					struct DCTStatStruc *pDCTstat, u8 ChipSel,
-					u8 RnkDlyFilterMin, u8 RnkDlyFilterMax);
 void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index);
 u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat,
 				struct DCTStatStruc *pDCTstat);
 static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
 					struct DCTStatStruc *pDCTstat,
 					u8 ChipSel);
-static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat,
-					struct DCTStatStruc *pDCTstat,
-					u8 cs_start);
 u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat,
 				struct DCTStatStruc *pDCTstat, u8 Channel,
 				u8 receiver, u8 *valid);
 static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat,
 				struct DCTStatStruc *pDCTstat,
 				u32 *buffer);
-
-static void StoreWrRdDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
-					struct DCTStatStruc *pDCTstat, u8 ChipSel,
-				      u8 RnkDlyFilterMin, u8 RnkDlyFilterMax);
+static void proc_IOCLFLUSH_D(u32 addr_hi);
 
 static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 ChipSel);
 
@@ -286,20 +268,99 @@ static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
 	pDCTstat->DQSDelay = (u8)DQSDelay;
 }
 
+static void write_dqs_write_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
+{
+	uint32_t dword;
+
+	/* Lanes 0 - 3 */
+	dword = Get_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8));
+	dword &= ~0x7f7f7f7f;
+	dword |= (delay[3] & 0x7f) << 24;
+	dword |= (delay[2] & 0x7f) << 16;
+	dword |= (delay[1] & 0x7f) << 8;
+	dword |= delay[0] & 0x7f;
+	Set_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8), dword);
+
+	/* Lanes 4 - 7 */
+	dword = Get_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8));
+	dword &= ~0x7f7f7f7f;
+	dword |= (delay[7] & 0x7f) << 24;
+	dword |= (delay[6] & 0x7f) << 16;
+	dword |= (delay[5] & 0x7f) << 8;
+	dword |= delay[4] & 0x7f;
+	Set_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8), dword);
+
+	/* Lane 8 (ECC) */
+	dword = Get_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8));
+	dword &= ~0x0000007f;
+	dword |= delay[8] & 0x7f;
+	Set_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8), dword);
+}
+
+static void write_dqs_read_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
+{
+	uint32_t dword;
+
+	/* Lanes 0 - 3 */
+	dword = Get_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8));
+	dword &= ~0x3f3f3f3f;
+	dword |= (delay[3] & 0x3f) << 24;
+	dword |= (delay[2] & 0x3f) << 16;
+	dword |= (delay[1] & 0x3f) << 8;
+	dword |= delay[0] & 0x3f;
+	Set_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8), dword);
+
+	/* Lanes 4 - 7 */
+	dword = Get_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8));
+	dword &= ~0x3f3f3f3f;
+	dword |= (delay[7] & 0x3f) << 24;
+	dword |= (delay[6] & 0x3f) << 16;
+	dword |= (delay[5] & 0x3f) << 8;
+	dword |= delay[4] & 0x3f;
+	Set_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8), dword);
+
+	/* Lane 8 (ECC) */
+	dword = Get_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8));
+	dword &= ~0x0000003f;
+	dword |= delay[8] & 0x3f;
+	Set_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8), dword);
+}
+
+/* DQS Position Training
+ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.3
+ */
 static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
-				struct DCTStatStruc *pDCTstat,
-				u8 cs_start)
+				struct DCTStatStruc *pDCTstat)
 {
 	u32 Errors;
-	u8 Channel, DQSWrDelay;
+	u8 Channel;
+	u8 Receiver;
 	u8 _DisableDramECC = 0;
-	u32 PatternBuffer[292];
+	u32 PatternBuffer[304];	/* 288 + 16 */
 	u8 _Wrap32Dis = 0, _SSE2 = 0;
-	u8 dqsWrDelay_end;
 
+	u32 dev;
 	u32 addr;
+	u8 valid;
 	u32 cr4;
 	u32 lo, hi;
+	u32 index_reg;
+	uint32_t TestAddr;
+
+	uint8_t dual_rank;
+	uint8_t iter;
+	uint8_t lane;
+	uint16_t bytelane_test_results;
+	uint16_t current_write_dqs_delay[MAX_BYTE_LANES];
+	uint16_t current_read_dqs_delay[MAX_BYTE_LANES];
+	uint16_t write_dqs_delay_stepping_done[MAX_BYTE_LANES];
+	uint8_t dqs_read_results_array[2][MAX_BYTE_LANES][64];		/* [rank][lane][step] */
+	uint8_t dqs_write_results_array[2][MAX_BYTE_LANES][128];	/* [rank][lane][step] */
+
+	uint8_t last_pos = 0;
+	uint8_t cur_count = 0;
+	uint8_t best_pos = 0;
+	uint8_t best_count = 0;
 
 	print_debug_dqs("\nTrainDQSRdWrPos: Node_ID ", pDCTstat->Node_ID, 0);
 	cr4 = read_cr4();
@@ -323,50 +384,363 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
 	SetupDqsPattern_D(pMCTstat, pDCTstat, PatternBuffer);
 
 	/* mct_BeforeTrainDQSRdWrPos_D */
-	dqsWrDelay_end = 0x20;
+
+	dev = pDCTstat->dev_dct;
+	pDCTstat->Direction = DQS_READDIR;
+
+	/* 2.8.9.9.3 (2)
+	 * Loop over each channel, lane, and rank
+	 */
+
+	/* NOTE
+	 * The BKDG originally stated to iterate over lane, then rank, however this process is quite slow
+	 * compared to an equivalent loop over rank, then lane as the latter allows multiple lanes to be
+	 * tested simultaneously, thus improving performance by around 8x.
+	 */
 
 	Errors = 0;
 	for (Channel = 0; Channel < 2; Channel++) {
-		print_debug_dqs("\tTrainDQSRdWrPos: 1 Channel ",Channel, 1);
+		print_debug_dqs("\tTrainDQSRdWrPos: 1 Channel ", Channel, 1);
 		pDCTstat->Channel = Channel;
 
 		if (pDCTstat->DIMMValidDCT[Channel] == 0)	/* mct_BeforeTrainDQSRdWrPos_D */
 			continue;
-		pDCTstat->DqsRdWrPos_Saved = 0;
-		for ( DQSWrDelay = 0; DQSWrDelay < dqsWrDelay_end; DQSWrDelay++) {
-			pDCTstat->DQSDelay = DQSWrDelay;
-			pDCTstat->Direction = DQS_WRITEDIR;
-			mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start);
-
-			print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2);
-			TrainReadDQS_D(pMCTstat, pDCTstat, cs_start);
-			print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DqsRdWrPos_Saved ", pDCTstat->DqsRdWrPos_Saved, 2);
-			if (pDCTstat->DqsRdWrPos_Saved == 0xFF)
-				break;
-
-			print_debug_dqs("\t\tTrainDQSRdWrPos: 22 TrainErrors ",pDCTstat->TrainErrors, 2);
-			if (pDCTstat->TrainErrors == 0) {
+
+		index_reg = 0x98 + 0x100 * Channel;
+
+		dual_rank = 0;
+		Receiver = mct_InitReceiver_D(pDCTstat, Channel);
+		/* There are four receiver pairs, loosely associated with chipselects.
+		* This is essentially looping over each rank of each DIMM.
+		*/
+		for (; Receiver < 8; Receiver++) {
+			if ((Receiver & 0x1) == 0) {
+				/* Even rank of DIMM */
+				if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1))
+					dual_rank = 1;
+				else
+					dual_rank = 0;
+			}
+
+			if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
+				continue;
+			}
+
+			/* Select the base test address for the current rank */
+			TestAddr = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
+			if (!valid) {	/* Address not supported on current CS */
+				continue;
+			}
+
+			print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 14 TestAddr ", TestAddr, 4);
+			SetUpperFSbase(TestAddr);	/* fs:eax=far ptr to target */
+
+			print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 12 Receiver ", Receiver, 2);
+
+			/* 2.8.9.9.3 (DRAM Write Data Timing Loop)
+			 * Iterate over all possible DQS delay values (0x0 - 0x7f)
+			 */
+			uint8_t test_write_dqs_delay = 0;
+			uint8_t test_read_dqs_delay = 0;
+			uint8_t passing_dqs_delay_found[MAX_BYTE_LANES];
+
+			/* Initialize variables */
+			for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+				current_write_dqs_delay[lane] = 0;
+				passing_dqs_delay_found[lane] = 0;
+				write_dqs_delay_stepping_done[lane] = 0;
+			}
+
+			for (test_write_dqs_delay = 0; test_write_dqs_delay < 128; test_write_dqs_delay++) {
+				print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 16 test_write_dqs_delay ", test_write_dqs_delay, 6);
+
+				/* Break out of loop if passing window already found, */
+				if (write_dqs_delay_stepping_done[0] && write_dqs_delay_stepping_done[1]
+					&& write_dqs_delay_stepping_done[2] && write_dqs_delay_stepping_done[3]
+					&& write_dqs_delay_stepping_done[4] && write_dqs_delay_stepping_done[5]
+					&& write_dqs_delay_stepping_done[6] && write_dqs_delay_stepping_done[7])
 					break;
+
+				/* Commit the current Write Data Timing settings to the hardware registers */
+				write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+				/* Write the DRAM training pattern to the base test address */
+				WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
+
+				/* 2.8.9.9.3 (DRAM Read DQS Timing Control Loop)
+				 * Iterate over all possible DQS delay values (0x0 - 0x3f)
+				 */
+				for (test_read_dqs_delay = 0; test_read_dqs_delay < 64; test_read_dqs_delay++) {
+					print_debug_dqs("\t\t\t\t\tTrainDQSRdWrPos: 161 test_read_dqs_delay ", test_read_dqs_delay, 6);
+
+					/* Initialize Read DQS Timing Control settings for this iteration */
+					for (lane = 0; lane < MAX_BYTE_LANES; lane++)
+						if (!write_dqs_delay_stepping_done[lane])
+							current_read_dqs_delay[lane] = test_read_dqs_delay;
+
+					/* Commit the current Read DQS Timing Control settings to the hardware registers */
+					write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+					/* Initialize test result variable */
+					bytelane_test_results = 0xff;
+
+					/* Read the DRAM training pattern from the base test address three times
+					 * NOTE
+					 * While the BKDG states to read three times this is probably excessive!
+					 * Decrease training time by only reading the test pattern once per iteration
+					 */
+					for (iter = 0; iter < 1; iter++) {
+						/* Flush caches */
+						SetTargetWTIO_D(TestAddr);
+						FlushDQSTestPattern_D(pDCTstat, TestAddr << 8);
+						ResetTargetWTIO_D();
+
+						/* Read and compare pattern */
+						bytelane_test_results &= (CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8) & 0xff); /* [Lane 7 :: Lane 0] 0=fail, 1=pass */
+
+						/* If all lanes have already failed testing bypass remaining re-read attempt(s) */
+						if (bytelane_test_results == 0x0)
+							break;
+					}
+
+					/* Store any lanes that passed testing for later use */
+					for (lane = 0; lane < 8; lane++)
+						if (!write_dqs_delay_stepping_done[lane])
+							dqs_read_results_array[Receiver & 0x1][lane][test_read_dqs_delay] = (!!(bytelane_test_results & (1 << lane)));
+
+					print_debug_dqs("\t\t\t\t\tTrainDQSRdWrPos: 162 bytelane_test_results ", bytelane_test_results, 6);
+				}
+
+				for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+					if (write_dqs_delay_stepping_done[lane])
+						continue;
+
+					/* Determine location and length of longest consecutive string of passing values
+					 * Output is stored in best_pos and best_count
+					 */
+					last_pos = 0;
+					cur_count = 0;
+					best_pos = 0;
+					best_count = 0;
+					for (iter = 0; iter < 64; iter++) {
+						if ((dqs_read_results_array[Receiver & 0x1][lane][iter]) && (iter < 63)) {
+							/* Pass */
+							cur_count++;
+						} else {
+							/* Failure or end of loop */
+							if (cur_count > best_count) {
+								best_count = cur_count;
+								best_pos = last_pos;
+							}
+							cur_count = 0;
+							last_pos = iter;
+						}
+					}
+
+					if (best_count > 2) {
+						/* Exit the DRAM Write Data Timing Loop after programming the Read DQS Timing Control
+						 * register with the center of the passing window
+						 */
+						current_read_dqs_delay[lane] = (best_pos + (best_count / 2));
+						passing_dqs_delay_found[lane] = 1;
+
+						/* Commit the current Read DQS Timing Control settings to the hardware registers */
+						write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+						/* Exit the DRAM Write Data Timing Loop */
+						write_dqs_delay_stepping_done[lane] = 1;
+
+						print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 142 largest passing region ", best_count, 4);
+						print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 143 largest passing region start ", best_pos, 4);
+					}
+
+					/* Increment the DQS Write Delay value if needed for the next DRAM Write Data Timing Loop iteration */
+					if (!write_dqs_delay_stepping_done[lane])
+						current_write_dqs_delay[lane]++;
+				}
 			}
-			Errors |= pDCTstat->TrainErrors;
-		}
 
-		pDCTstat->DqsRdWrPos_Saved = 0;
-		if (DQSWrDelay < dqsWrDelay_end) {
-			Errors = 0;
+			/* Flag failure(s) if present */
+			for (lane = 0; lane < 8; lane++) {
+				if (!passing_dqs_delay_found[lane]) {
+					print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 121 Unable to find passing region for lane ", lane, 2);
+
+					/* Flag absence of passing window */
+					Errors |= 1 << SB_NODQSPOS;
+				}
+			}
+
+			/* Iterate over all possible Write Data Timing values (0x0 - 0x7f)
+			 * Note that the Read DQS Timing Control was calibrated / centered in the prior nested loop
+			 */
+			for (test_write_dqs_delay = 0; test_write_dqs_delay < 128; test_write_dqs_delay++) {
+				/* Initialize Write Data Timing settings for this iteration */
+				for (lane = 0; lane < MAX_BYTE_LANES; lane++)
+					current_write_dqs_delay[lane] = test_write_dqs_delay;
+
+				/* Commit the current Write Data Timing settings to the hardware registers */
+				write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+				/* Write the DRAM training pattern to the base test address */
+				WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
+
+				/* Flush caches */
+				SetTargetWTIO_D(TestAddr);
+				FlushDQSTestPattern_D(pDCTstat, TestAddr << 8);
+				ResetTargetWTIO_D();
+
+				/* Read and compare pattern from the base test address */
+				bytelane_test_results = (CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8) & 0xff); /* [Lane 7 :: Lane 0] 0=fail, 1=pass */
+
+				/* Store any lanes that passed testing for later use */
+				for (lane = 0; lane < 8; lane++)
+					dqs_write_results_array[Receiver & 0x1][lane][test_write_dqs_delay] = (!!(bytelane_test_results & (1 << lane)));
+			}
+
+			for (lane = 0; lane < 8; lane++) {
+				if ((!dual_rank) || (dual_rank && (Receiver & 0x1))) {
+
+#ifdef PRINT_PASS_FAIL_BITMAPS
+					for (iter = 0; iter < 64; iter++) {
+						if (dqs_read_results_array[0][lane][iter])
+							printk(BIOS_DEBUG, "+");
+						else
+							printk(BIOS_DEBUG, ".");
+					}
+					printk(BIOS_DEBUG, "\n");
+					for (iter = 0; iter < 64; iter++) {
+						if (dqs_read_results_array[1][lane][iter])
+							printk(BIOS_DEBUG, "+");
+						else
+							printk(BIOS_DEBUG, ".");
+					}
+					printk(BIOS_DEBUG, "\n\n");
+					for (iter = 0; iter < 128; iter++) {
+						if (dqs_write_results_array[0][lane][iter])
+							printk(BIOS_DEBUG, "+");
+						else
+							printk(BIOS_DEBUG, ".");
+					}
+					printk(BIOS_DEBUG, "\n");
+					for (iter = 0; iter < 128; iter++) {
+						if (dqs_write_results_array[1][lane][iter])
+							printk(BIOS_DEBUG, "+");
+						else
+							printk(BIOS_DEBUG, ".");
+					}
+					printk(BIOS_DEBUG, "\n\n");
+#endif
+
+					/* Base rank of single-rank DIMM, or odd rank of dual-rank DIMM */
+					if (dual_rank) {
+						/* Intersect the passing windows of both ranks */
+						for (iter = 0; iter < 64; iter++)
+							if (!dqs_read_results_array[1][lane][iter])
+								dqs_read_results_array[0][lane][iter] = 0;
+						for (iter = 0; iter < 128; iter++)
+							if (!dqs_write_results_array[1][lane][iter])
+								dqs_write_results_array[0][lane][iter] = 0;
+					}
+
+					/* Determine location and length of longest consecutive string of passing values for read DQS timing
+					 * Output is stored in best_pos and best_count
+					 */
+					last_pos = 0;
+					cur_count = 0;
+					best_pos = 0;
+					best_count = 0;
+					for (iter = 0; iter < 64; iter++) {
+						if ((dqs_read_results_array[0][lane][iter]) && (iter < 63)) {
+							/* Pass */
+							cur_count++;
+						} else {
+							/* Failure or end of loop */
+							if (cur_count > best_count) {
+								best_count = cur_count;
+								best_pos = last_pos;
+							}
+							cur_count = 0;
+							last_pos = iter;
+						}
+					}
+					print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 144 largest read passing region ", best_count, 4);
+					if (best_count > 0) {
+						if (best_count < MIN_DQS_WNDW) {
+							/* Flag excessively small passing window */
+							Errors |= 1 << SB_SMALLDQS;
+						}
+
+						/* Find the center of the passing window */
+						current_read_dqs_delay[lane] = (best_pos + (best_count / 2));
+
+						/* Commit the current Read DQS Timing Control settings to the hardware registers */
+						write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+						/* Save the final Read DQS Timing Control settings for later use */
+						pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_READDIR][lane] = current_read_dqs_delay[lane];
+					} else {
+						print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 122 Unable to find read passing region for lane ", lane, 2);
+
+						/* Flag absence of passing window */
+						Errors |= 1 << SB_NODQSPOS;
+					}
+
+					/* Determine location and length of longest consecutive string of passing values for write DQS timing
+					 * Output is stored in best_pos and best_count
+					 */
+					last_pos = 0;
+					cur_count = 0;
+					best_pos = 0;
+					best_count = 0;
+					for (iter = 0; iter < 128; iter++) {
+						if ((dqs_write_results_array[0][lane][iter]) && (iter < 127)) {
+							/* Pass */
+							cur_count++;
+						} else {
+							/* Failure or end of loop */
+							if (cur_count > best_count) {
+								best_count = cur_count;
+								best_pos = last_pos;
+							}
+							cur_count = 0;
+							last_pos = iter;
+						}
+					}
+					print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 145 largest write passing region ", best_count, 4);
+					if (best_count > 0) {
+						if (best_count < MIN_DQS_WNDW) {
+							/* Flag excessively small passing window */
+							Errors |= 1 << SB_SMALLDQS;
+						}
+
+						/* Find the center of the passing window */
+						current_write_dqs_delay[lane] = (best_pos + (best_count / 2));
+
+						/* Commit the current Write Data Timing settings to the hardware registers */
+						write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+						/* Save the final Write Data Timing settings for later use */
+						pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_WRITEDIR][lane] = current_write_dqs_delay[lane];
+					} else {
+						print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 123 Unable to find write passing region for lane ", lane, 2);
+
+						/* Flag absence of passing window */
+						Errors |= 1 << SB_NODQSPOS;
+					}
+				}
+			}
 
-			print_debug_dqs("\tTrainDQSRdWrPos: 231 DQSWrDelay ", DQSWrDelay, 1);
-			TrainWriteDQS_D(pMCTstat, pDCTstat, cs_start);
 		}
-		print_debug_dqs("\tTrainDQSRdWrPos: 232 Errors ", Errors, 1);
-		pDCTstat->ErrStatus |= Errors;
 	}
 
+	pDCTstat->TrainErrors |= Errors;
+	pDCTstat->ErrStatus |= Errors;
+
 #if DQS_TRAIN_DEBUG > 0
 	{
 		u8 val;
 		u8 i;
-		u8 Channel, Receiver, Dir;
+		u8 ChannelDTD, ReceiverDTD, Dir;
 		u8 *p;
 
 		for (Dir = 0; Dir < 2; Dir++) {
@@ -375,14 +749,14 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
 			} else {
 				printk(BIOS_DEBUG, "TrainDQSRdWrPos: CH_D_DIR_B_DQS RD:\n");
 			}
-			for (Channel = 0; Channel < 2; Channel++) {
-				printk(BIOS_DEBUG, "Channel: %02x\n", Channel);
-				for (Receiver = cs_start; Receiver < (cs_start + 2); Receiver += 2) {
-					printk(BIOS_DEBUG, "\t\tReceiver: %02x: ", Receiver);
-					p = pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][Dir];
+			for (ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) {
+				printk(BIOS_DEBUG, "Channel: %02x\n", ChannelDTD);
+				for (ReceiverDTD = 0; ReceiverDTD < MAX_CS_SUPPORTED; ReceiverDTD += 2) {
+					printk(BIOS_DEBUG, "\t\tReceiver: %02x:", ReceiverDTD);
+					p = pDCTstat->CH_D_DIR_B_DQS[ChannelDTD][ReceiverDTD >> 1][Dir];
 					for (i=0;i<8; i++) {
 						val  = p[i];
-						printk(BIOS_DEBUG, "%02x ", val);
+						printk(BIOS_DEBUG, " %02x", val);
 					}
 					printk(BIOS_DEBUG, "\n");
 				}
@@ -437,225 +811,6 @@ static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat,
 	pDCTstat->PtrPatternBufA = (u32)buf;
 }
 
-static void TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
-				struct DCTStatStruc *pDCTstat,
-				u8 cs_start)
-{
-	u32 Errors;
-	u8 ChipSel, DQSDelay;
-	u8 RnkDlySeqPassMin=0, RnkDlySeqPassMax=0xFF, RnkDlyFilterMin=0, RnkDlyFilterMax=0xFF;
-	u8 RnkDlySeqPassMinTot=0, RnkDlySeqPassMaxTot=0xFF, RnkDlyFilterMinTot=0, RnkDlyFilterMaxTot=0xFF;
-	u8 LastTest ,LastTestTot;
-	u32 TestAddr;
-	u8 ByteLane;
-	u8 MutualCSPassW[128];
-	u8 BanksPresent;
-	u8 dqsDelay_end;
-	u8 tmp, valid, tmp1;
-	u16 word;
-
-	/* MutualCSPassW: each byte represents a bitmap of pass/fail per
-	 * ByteLane.  The indext within MutualCSPassW is the delay value
-	 * given the results.
-	 */
-	print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3);
-
-	Errors = 0;
-	BanksPresent = 0;
-
-	dqsDelay_end = 32;
-	/* Bitmapped status per delay setting, 0xff=All positions
-	 * passing (1= PASS). Set the entire array.
-	 */
-	for (DQSDelay=0; DQSDelay<128; DQSDelay++) {
-		MutualCSPassW[DQSDelay] = 0xFF;
-	}
-
-	for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) { /* logical register chipselects 0..7 */
-		print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4);
-
-		if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) {
-			print_debug_dqs("\t\t\t\tmct_RcvrRankEnabled_D CS not enabled ", ChipSel, 4);
-			continue;
-		}
-
-		BanksPresent = 1; 	/* flag for at least one bank is present */
-		TestAddr = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel, &valid);
-		if (!valid) {
-			print_debug_dqs("\t\t\t\tAddress not supported on current CS ", TestAddr, 4);
-			continue;
-		}
-
-		print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4);
-		SetUpperFSbase(TestAddr);	/* fs:eax=far ptr to target */
-
-		if (pDCTstat->Direction == DQS_READDIR) {
-			print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read ", 0, 4);
-			WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
-		}
-
-		for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
-			print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5);
-
-			tmp = 0xFF;
-			tmp1 = DQSDelay;
-			if (pDCTstat->Direction == DQS_READDIR) {
-				tmp &= MutualCSPassW[DQSDelay];
-				tmp1 += dqsDelay_end;
-			}
-			tmp &= MutualCSPassW[tmp1];
-
-			if (tmp == 0) {
-				continue;/* skip current delay value if other chipselects have failed all 8 bytelanes */
-			}
-
-			pDCTstat->DQSDelay = DQSDelay;
-			mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start);
-			print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
-
-			if (pDCTstat->Direction == DQS_WRITEDIR) {
-				print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5);
-				WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
-			}
-
-			print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", pDCTstat->Pattern, 5);
-			ReadDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
-			/* print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); */
-			word = CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); /* 0=fail, 1=pass */
-			print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 compare 1 ", word, 3);
-
-			print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 DqsRdWrPos_Saved ", pDCTstat->DqsRdWrPos_Saved, 3);
-			word &= ~(pDCTstat->DqsRdWrPos_Saved); /* mask out bytelanes that already passed */
-			word &= ~(pDCTstat->DqsRdWrPos_Saved << 8);
-			print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 compare 2 ", word, 3);
-
-			tmp = DQSDelay;
-			if (pDCTstat->Direction == DQS_READDIR) {
-				MutualCSPassW[tmp] &= word >> 8;
-				tmp += dqsDelay_end;
-			}
-			MutualCSPassW[tmp] &= word & 0xFF;
-
-			print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 \tMutualCSPassW ", MutualCSPassW[DQSDelay], 5);
-
-			SetTargetWTIO_D(TestAddr);
-			FlushDQSTestPattern_D(pDCTstat, TestAddr << 8);
-			ResetTargetWTIO_D();
-		}
-
-	}
-
-	if (pDCTstat->Direction == DQS_READDIR) {
-		dqsDelay_end <<= 1;
-	}
-
-	if (BanksPresent) {
-		#if 0		/* show the bitmap */
-		for (ByteLane = 0; ByteLane < 8; ByteLane++) { /* just print ByteLane 0 */
-			for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
-				if (!(MutualCSPassW[DQSDelay] &(1 << ByteLane))) {
-					printk(BIOS_DEBUG, ".");
-				} else {
-					printk(BIOS_DEBUG, "*");
-				}
-			}
-			printk(BIOS_DEBUG, "\n");
-		}
-		#endif
-		for (ByteLane = 0; ByteLane < 8; ByteLane++) {
-			print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4);
-			if (!(pDCTstat->DqsRdWrPos_Saved &(1 << ByteLane))) {
-				pDCTstat->ByteLane = ByteLane;
-				LastTest = DQS_FAIL;		/* Analyze the results */
-				LastTestTot = DQS_FAIL;
-				/* RnkDlySeqPassMin = 0; */
-				/* RnkDlySeqPassMax = 0; */
-				RnkDlyFilterMax = 0;
-				RnkDlyFilterMin = 0;
-				RnkDlyFilterMaxTot = 0;
-				RnkDlyFilterMinTot = 0;
-				for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
-					if (MutualCSPassW[DQSDelay] & (1 << ByteLane)) {
-						print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5);
-						print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
-						if (pDCTstat->Direction == DQS_READDIR)
-							tmp = 0x20;
-						else
-							tmp = 0;
-						if (DQSDelay >= tmp) {
-							RnkDlySeqPassMax = DQSDelay;
-							if (LastTest == DQS_FAIL) {
-								RnkDlySeqPassMin = DQSDelay; /* start sequential run */
-							}
-							if ((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
-								RnkDlyFilterMin = RnkDlySeqPassMin;
-								RnkDlyFilterMax = RnkDlySeqPassMax;
-							}
-							LastTest = DQS_PASS;
-						}
-
-						if (pDCTstat->Direction == DQS_READDIR) {
-							RnkDlySeqPassMaxTot = DQSDelay;
-							if (LastTestTot == DQS_FAIL)
-								RnkDlySeqPassMinTot = DQSDelay;
-							if ((RnkDlySeqPassMaxTot - RnkDlySeqPassMinTot)>(RnkDlyFilterMaxTot-RnkDlyFilterMinTot)){
-								RnkDlyFilterMinTot = RnkDlySeqPassMinTot;
-								RnkDlyFilterMaxTot = RnkDlySeqPassMaxTot;
-							}
-							LastTestTot = DQS_PASS;
-						}
-					} else {
-						LastTest = DQS_FAIL;
-						LastTestTot = DQS_FAIL;
-					}
-				}
-				print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4);
-				if (RnkDlySeqPassMax == 0) {
-					Errors |= 1 << SB_NODQSPOS; /* no passing window */
-				} else {
-					print_debug_dqs_pair("\t\t\t\tTrainDQSPos: 34 RnkDlyFilter: ", RnkDlyFilterMin, " ",  RnkDlyFilterMax, 4);
-					if (((RnkDlyFilterMax - RnkDlyFilterMin) < MIN_DQS_WNDW)){
-						Errors |= 1 << SB_SMALLDQS;
-					} else {
-						u8 middle_dqs;
-						/* mctEngDQSwindow_Save_D Not required for arrays */
-						if (pDCTstat->Direction == DQS_READDIR)
-							middle_dqs = MiddleDQS_D(RnkDlyFilterMinTot, RnkDlyFilterMaxTot);
-						else
-							middle_dqs = MiddleDQS_D(RnkDlyFilterMin, RnkDlyFilterMax);
-						pDCTstat->DQSDelay = middle_dqs;
-						mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, cs_start);  /* load the register with the value */
-						if (pDCTstat->Direction == DQS_READDIR)
-							StoreWrRdDQSDatStrucVal_D(pMCTstat, pDCTstat, cs_start, RnkDlyFilterMinTot, RnkDlyFilterMaxTot); /* store the value into the data structure */
-						else
-							StoreWrRdDQSDatStrucVal_D(pMCTstat, pDCTstat, cs_start, RnkDlyFilterMin, RnkDlyFilterMax); /* store the value into the data structure */
-						print_debug_dqs("\t\t\t\tTrainDQSPos: 42 middle_dqs : ",middle_dqs, 4);
-						pDCTstat->DqsRdWrPos_Saved |= 1 << ByteLane;
-					}
-				}
-			}
-		} /* if (pDCTstat->DqsRdWrPos_Saved &(1 << ByteLane)) */
-	}
-/* skipLocMiddle: */
-	pDCTstat->TrainErrors = Errors;
-
-	print_debug_dqs("\t\t\tTrainDQSPos: Errors ", Errors, 3);
-}
-
-static void mctEngDQSwindow_Save_D(struct MCTStatStruc *pMCTstat,
-					struct DCTStatStruc *pDCTstat, u8 ChipSel,
-					u8 RnkDlyFilterMin, u8 RnkDlyFilterMax)
-{
-	pDCTstat->CH_D_DIR_MaxMin_B_Dly[pDCTstat->Channel]
-		[pDCTstat->Direction]
-		[0]
-		[pDCTstat->ByteLane] = RnkDlyFilterMin;
-	pDCTstat->CH_D_DIR_MaxMin_B_Dly[pDCTstat->Channel]
-		[pDCTstat->Direction]
-		[1]
-		[pDCTstat->ByteLane] = RnkDlyFilterMax;
-}
-
 static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
 					struct DCTStatStruc *pDCTstat, u8 ChipSel)
 {
@@ -679,26 +834,6 @@ static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
 					pDCTstat->DQSDelay;
 }
 
-static void StoreWrRdDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
-					struct DCTStatStruc *pDCTstat, u8 ChipSel,
-					u8 RnkDlyFilterMin, u8 RnkDlyFilterMax)
-{
-	u8 dn;
-
-	if (pDCTstat->Direction == DQS_WRITEDIR) {
-		dn = ChipSel >> 1;
-		RnkDlyFilterMin += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane];
-		RnkDlyFilterMax += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane];
-		pDCTstat->DQSDelay += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane];
-	} else {
-		RnkDlyFilterMin <<= 1;
-		RnkDlyFilterMax <<= 1;
-		pDCTstat->DQSDelay <<= 1;
-	}
-	mctEngDQSwindow_Save_D(pMCTstat, pDCTstat, ChipSel, RnkDlyFilterMin, RnkDlyFilterMax);
-	StoreDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
-}
-
 static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
 				struct DCTStatStruc *pDCTstat, u8 ChipSel)
 {
@@ -720,33 +855,6 @@ static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
 
 /* FindDQSDatDimmVal_D is not required since we use an array */
 
-static u8 MiddleDQS_D(u8 min, u8 max)
-{
-	u8 size;
-	size = max-min;
-	if (size % 2)
-		size++;		/* round up if the size isn't even. */
-	return ( min + (size >> 1));
-}
-
-static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat,
-				struct DCTStatStruc *pDCTstat,
-				u8 cs_start)
-{
-	print_debug_dqs("\t\tTrainReadPos ", 0, 2);
-	pDCTstat->Direction = DQS_READDIR;
-	TrainDQSPos_D(pMCTstat, pDCTstat, cs_start);
-}
-
-static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat,
-				struct DCTStatStruc *pDCTstat,
-				u8 cs_start)
-{
-	pDCTstat->Direction = DQS_WRITEDIR;
-	print_debug_dqs("\t\tTrainWritePos", 0, 2);
-	TrainDQSPos_D(pMCTstat, pDCTstat, cs_start);
-}
-
 static void proc_IOCLFLUSH_D(u32 addr_hi)
 {
 	SetTargetWTIO_D(addr_hi);
@@ -963,30 +1071,6 @@ static void ResetTargetWTIO_D(void)
 	_WRMSR(0xc0010017, lo, hi); /* IORR0 Mask */
 }
 
-static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
-				struct DCTStatStruc *pDCTstat,
-				u32 TestAddr_lo)
-{
-	/* Read a pattern of 72 bit times (per DQ), to test dram functionality.
-	 * The pattern is a stress pattern which exercises both ISI and
-	 * crosstalk.  The number of cache lines to fill is dependent on DCT
-	 * width mode and burstlength.
-	 * Mode BL  Lines Pattern no.
-	 * ----+---+-------------------
-	 * 64	4	  9	0
-	 * 64	8	  9	0
-	 * 64M	4	  9	0
-	 * 64M	8	  9	0
-	 * 128	4	  18	1
-	 * 128	8	  N/A	-
-	 */
-	if (pDCTstat->Pattern == 0)
-		ReadL9TestPattern(TestAddr_lo);
-	else
-		ReadL18TestPattern(TestAddr_lo);
-	_MFENCE;
-}
-
 u32 SetUpperFSbase(u32 addr_hi)
 {
 	/* Set the upper 32-bits of the Base address, 4GB aligned) for the
@@ -1009,8 +1093,6 @@ void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index)
 	Set_NB32_index_wait(dev, index_reg, index, val);
 }
 
-/* mctEngDQSwindow_Save_D not required with arrays */
-
 void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
 			struct DCTStatStruc *pDCTstatA)
 {
@@ -1021,8 +1103,8 @@ void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
 	for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
 		pDCTstat = pDCTstatA + Node;
 		if (pDCTstat->DCTSysLimit) {
+			TrainDQSRdWrPos_D(pMCTstat, pDCTstat);
 			for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
-				TrainDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel);
 				SetEccDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel);
 			}
 		}
@@ -1137,27 +1219,6 @@ static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
 	}
 }
 
-/*
- * mct_SetDQSDelayAllCSR_D:
- * Write the Delay value to all eight byte lanes.
- */
-static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat,
-					struct DCTStatStruc *pDCTstat,
-					u8 cs_start)
-{
-	u8 ByteLane;
-	u8 ChipSel = cs_start;
-
-	for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) {
-		if ( mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) {
-			for (ByteLane = 0; ByteLane < 8; ByteLane++) {
-				pDCTstat->ByteLane = ByteLane;
-				mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, ChipSel);
-			}
-		}
-	}
-}
-
 u8 mct_RcvrRankEnabled_D(struct MCTStatStruc *pMCTstat,
 				struct DCTStatStruc *pDCTstat,
 				u8 Channel, u8 ChipSel)
@@ -1196,7 +1257,7 @@ u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat,
 	reg = 0x40 + (receiver << 2) + reg_off;
 	val = Get_NB32(dev, reg);
 
-	val &= ~0x0F;
+	val &= ~0xe007c01f;
 
 	/* unganged mode DCT0+DCT1, sys addr of DCT1=node
 	 * base+DctSelBaseAddr+local ca base*/
@@ -1277,6 +1338,7 @@ exitGetAddrWNoError:
 	print_debug_dqs("mct_GetMCTSysAddr_D: base_addr ", val, 2);
 	print_debug_dqs("mct_GetMCTSysAddr_D: valid ", *valid, 2);
 	print_debug_dqs("mct_GetMCTSysAddr_D: status ", pDCTstat->Status, 2);
+	print_debug_dqs("mct_GetMCTSysAddr_D: SysBase ", pDCTstat->DCTSysBase, 2);
 	print_debug_dqs("mct_GetMCTSysAddr_D: HoleBase ", pDCTstat->DCTHoleBase, 2);
 	print_debug_dqs("mct_GetMCTSysAddr_D: Cachetop ", pMCTstat->Sub4GCacheTop, 2);
 
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
index 528c782..60bc01d 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
@@ -2,6 +2,7 @@
  * This file is part of the coreboot project.
  *
  * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson at raptorengineeringinc.com>, Raptor Engineering
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -25,7 +26,6 @@ static void EnableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStr
 static void DisableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
 static void PrepareC_MCT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
 static void PrepareC_DCT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct);
-static void MultiplyDelay(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct);
 static void Restore_OnDimmMirror(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
 static void Clear_OnDimmMirror(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
 
@@ -154,7 +154,6 @@ static void PhyWLPass2(struct MCTStatStruc *pMCTstat,
 		Clear_OnDimmMirror(pMCTstat, pDCTstat);
 		SetDllSpeedUp_D(pMCTstat, pDCTstat, dct);
 		DisableAutoRefresh_D(pMCTstat, pDCTstat);
-		MultiplyDelay(pMCTstat, pDCTstat, dct);
 		for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm ++) {
 			if (DIMMValid & (1 << (dimm << 1)))
 				AgesaHwWlPhase1(pDCTstat->C_MCTPtr, pDCTstat->C_DCTPtr[dct], dimm, SecondPass);
@@ -162,6 +161,9 @@ static void PhyWLPass2(struct MCTStatStruc *pMCTstat,
 	}
 }
 
+/* Write Levelization Training
+ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.1
+ */
 static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
 					struct DCTStatStruc *pDCTstat)
 {
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c
index 3d625de..596fb23 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c
@@ -2,6 +2,7 @@
  * This file is part of the coreboot project.
  *
  * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson at raptorengineeringinc.com>, Raptor Engineering
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -201,12 +202,13 @@ static void SetMTRRrange_D(u32 Base, u32 *pLimit, u32 *pMtrrAddr, u16 MtrrType)
 
 void UMAMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA)
 {
-/* UMA memory size may need splitting the MTRR configuration into two
-  Before training use NB_BottomIO or the physical memory size to set the MTRRs.
-  After training, add UMAMemTyping function to reconfigure the MTRRs based on
-  NV_BottomUMA (for UMA systems only).
-  This two-step process allows all memory to be cached for training
-*/
+	/* UMA memory size may need splitting the MTRR configuration into two
+	 * Before training use NB_BottomIO or the physical memory size to set the MTRRs.
+	 * After training, add UMAMemTyping function to reconfigure the MTRRs based on
+	 * NV_BottomUMA (for UMA systems only).
+	 * This two-step process allows all memory to be cached for training
+	*/
+
 	u32 Bottom32bIO, Cache32bTOP;
 	u32 val;
 	u32 addr;
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c
index 013a1b9..6f97061 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c
@@ -2,6 +2,7 @@
  * This file is part of the coreboot project.
  *
  * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson at raptorengineeringinc.com>, Raptor Engineering
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -140,7 +141,7 @@ void InterleaveNodes_D(struct MCTStatStruc *pMCTstat,
 	}
 
 	if (DoIntlv) {
-		MCTMemClr_D(pMCTstat,pDCTstatA);
+		MCTMemClr_D(pMCTstat, pDCTstatA);
 		/* Program Interleaving enabled on Node 0 map only.*/
 		MemSize0 <<= bsf(Nodes);	/* MemSize=MemSize*2 (or 4, or 8) */
 		Dct0MemSize <<= bsf(Nodes);
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c
index da2f372..cda9c6b 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c
@@ -2,6 +2,7 @@
  * This file is part of the coreboot project.
  *
  * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson at raptorengineeringinc.com>, Raptor Engineering
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -36,10 +37,10 @@ u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2)
 		val = Get_NB32(pDCTstat->dev_dct, dct * 0x100 + 0x78);
 
 		val &= 7;
-		val = ((~val) & 0xFF) + 1;
+		val = ((~val) & 0xff) + 1;
 		val += 6;
-		val &= 0xFF;
-		misc2 &= 0xFFF8FFFF;
+		val &= 0x7;
+		misc2 &= 0xfff8ffff;
 		misc2 |= val << 16;	/* DataTxFifoWrDly */
 		if (pDCTstat->LogicalCPUID & AMD_DR_Dx)
 			misc2 |= 1 << 7; /* ProgOdtEn */
@@ -52,11 +53,15 @@ void mct_ExtMCTConfig_Cx(struct DCTStatStruc *pDCTstat)
 	u32 val;
 
 	if (pDCTstat->LogicalCPUID & (AMD_DR_Cx)) {
-		Set_NB32(pDCTstat->dev_dct, 0x11C, 0x0CE00FC0 | 1 << 29/* FlushWrOnStpGnt */);
+		/* Revision C */
+		Set_NB32(pDCTstat->dev_dct, 0x11c, 0x0ce00fc0 | 1 << 29/* FlushWrOnStpGnt */);
+	}
 
-		val = Get_NB32(pDCTstat->dev_dct, 0x1B0);
-		val &= 0xFFFFF8C0;
+	if (pDCTstat->LogicalCPUID & (AMD_DR_Cx)) {
+		val = Get_NB32(pDCTstat->dev_dct, 0x1b0);
+		val &= ~0x73f;
 		val |= 0x101;	/* BKDG recommended settings */
-		Set_NB32(pDCTstat->dev_dct, 0x1B0, val);
+
+		Set_NB32(pDCTstat->dev_dct, 0x1b0, val);
 	}
 }
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
index 6de2f4e..b21b96a 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
@@ -2,6 +2,7 @@
  * This file is part of the coreboot project.
  *
  * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson at raptorengineeringinc.com>, Raptor Engineering
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -172,6 +173,7 @@ static u32 mct_MR1(struct MCTStatStruc *pMCTstat,
 			ret |= 1 << 11;
 	}
 
+	/* program MrsAddress[12]=QOFF: based on F2x[1,0]84[Qoff] */
 	if (dword & (1 << 13))
 		ret |= 1 << 12;
 
@@ -199,7 +201,8 @@ static u32 mct_MR0(struct MCTStatStruc *pMCTstat,
 	/* program MrsAddress[6:4,2]=read CAS latency
 	   (CL):based on F2x[1,0]88[Tcl] */
 	dword2 = Get_NB32(dev, reg_off + 0x88);
-	ret |= (dword2 & 0xF) << 4; /* F2x88[3:0] to MrsAddress[6:4,2]=xxx0b */
+	ret |= (dword2 & 0x7) << 4;		/* F2x88[2:0] to MrsAddress[6:4] */
+	ret |= ((dword2 & 0x8) >> 3) << 2;	/* F2x88[3] to MrsAddress[2] */
 
 	/* program MrsAddress[12]=0 (PPD):slow exit */
 	if (dword & (1 << 23))
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
index 8e5c268..91e8f77 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
@@ -2,6 +2,7 @@
  * This file is part of the coreboot project.
  *
  * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson at raptorengineeringinc.com>, Raptor Engineering
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -24,25 +25,13 @@
 
 static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
 				struct DCTStatStruc *pDCTstat, u8 Pass);
-static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
-					u8 rcvrEnDly, u8 Channel,
-					u8 receiver, u8 Pass);
-static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
-					struct DCTStatStruc *pDCTstat,
-					u32 addr, u8 channel,
-					u8 pattern, u8 Pass);
 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
 					 struct DCTStatStruc *pDCTstat);
 static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
 				struct DCTStatStruc *pDCTstat, u8 Channel);
 static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
 				struct DCTStatStruc *pDCTstat, u8 Channel);
-static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat,
-				u8 RcvrEnDly, u8 where,
-				u8 Channel, u8 Receiver,
-				u32 dev, u32 index_reg,
-				u8 Addl_Index, u8 Pass);
-static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly);
+static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly);
 static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
 			struct DCTStatStruc *pDCTstat, u8 dct);
 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
@@ -50,17 +39,17 @@ static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
 /* Warning:  These must be located so they do not cross a logical 16-bit
    segment boundary! */
 static const u32 TestPattern0_D[] = {
-	0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
-	0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
-	0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
-	0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
-};
-static const u32 TestPattern1_D[] = {
 	0x55555555, 0x55555555, 0x55555555, 0x55555555,
 	0x55555555, 0x55555555, 0x55555555, 0x55555555,
 	0x55555555, 0x55555555, 0x55555555, 0x55555555,
 	0x55555555, 0x55555555, 0x55555555, 0x55555555,
 };
+static const u32 TestPattern1_D[] = {
+	0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
+	0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
+	0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
+	0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
+};
 static const u32 TestPattern2_D[] = {
 	0x12345678, 0x87654321, 0x23456789, 0x98765432,
 	0x59385824, 0x30496724, 0x24490795, 0x99938733,
@@ -104,16 +93,87 @@ void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat,
 		dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass);
 }
 
+static void read_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
+{
+	uint8_t lane;
+	uint32_t dword;
+
+	for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+		uint32_t wdt_reg;
+		if ((lane == 0) || (lane == 1))
+			wdt_reg = 0x30;
+		if ((lane == 2) || (lane == 3))
+			wdt_reg = 0x31;
+		if ((lane == 4) || (lane == 5))
+			wdt_reg = 0x40;
+		if ((lane == 6) || (lane == 7))
+			wdt_reg = 0x41;
+		if (lane == 8)
+			wdt_reg = 0x32;
+		wdt_reg += dimm * 3;
+		dword = Get_NB32_index_wait(dev, index_reg, wdt_reg);
+		if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1))
+			current_total_delay[lane] = (dword & 0x00ff0000) >> 16;
+		if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0))
+			current_total_delay[lane] = dword & 0x000000ff;
+	}
+}
+
+static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
+{
+	uint8_t lane;
+	uint32_t dword;
+
+	for (lane = 0; lane < 8; lane++) {
+		uint32_t ret_reg;
+		if ((lane == 0) || (lane == 1))
+			ret_reg = 0x10;
+		if ((lane == 2) || (lane == 3))
+			ret_reg = 0x11;
+		if ((lane == 4) || (lane == 5))
+			ret_reg = 0x20;
+		if ((lane == 6) || (lane == 7))
+			ret_reg = 0x21;
+		ret_reg += dimm * 3;
+		dword = Get_NB32_index_wait(dev, index_reg, ret_reg);
+		if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) {
+			dword &= ~(0x1ff << 16);
+			dword |= (current_total_delay[lane] & 0x1ff) << 16;
+		}
+		if ((lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) {
+			dword &= ~0x1ff;
+			dword |= current_total_delay[lane] & 0x1ff;
+		}
+		Set_NB32_index_wait(dev, index_reg, ret_reg, dword);
+	}
+}
+
+static uint32_t convert_testaddr_and_channel_to_address(struct DCTStatStruc *pDCTstat, uint32_t testaddr, uint8_t channel)
+{
+	SetUpperFSbase(testaddr);
+	testaddr <<= 8;
+
+	if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) {
+		testaddr += 8;	/* second channel */
+	}
+
+	return testaddr;
+}
+
+/* DQS Receiver Enable Training
+ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.2
+ */
 static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
 				struct DCTStatStruc *pDCTstat, u8 Pass)
 {
-	u8 Channel, RcvrEnDly, RcvrEnDlyRmin;
-	u8 Test0, Test1, CurrTest, CurrTestSide0, CurrTestSide1;
-	u8 CTLRMaxDelay, _2Ranks, PatternA, PatternB;
+	u8 Channel;
+	u8 _2Ranks;
 	u8 Addl_Index = 0;
 	u8 Receiver;
 	u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
-	u8 RcvrEnDlyLimit, Final_Value, MaxDelay_CH[2];
+	u8 Final_Value;
+	u16 CTLRMaxDelay;
+	u16 MaxDelay_CH[2];
 	u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B;
 	u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */
 	u32 Errors;
@@ -127,9 +187,20 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
 	u32 cr4;
 	u32 lo, hi;
 
+	uint32_t dword;
+	uint8_t rank;
+	uint8_t lane;
+	uint16_t current_total_delay[MAX_BYTE_LANES];
+	uint16_t candidate_total_delay[8];
+	uint8_t data_test_pass_sr[2][8];	/* [rank][lane] */
+	uint8_t data_test_pass[8];		/* [lane] */
+	uint8_t data_test_pass_prev[8];		/* [lane] */
+	uint8_t window_det_toggle[8];
+	uint8_t trained[8];
+	uint64_t result_qword1;
+	uint64_t result_qword2;
+
 	u8 valid;
-	u32 tmp;
-	u8 LastTest;
 
 	print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
 	print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
@@ -181,33 +252,103 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
 
 	Errors = 0;
 	dev = pDCTstat->dev_dct;
-	CTLRMaxDelay = 0;
 
 	for (Channel = 0; Channel < 2; Channel++) {
 		print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1);
 		print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1);
 		pDCTstat->Channel = Channel;
 
+		CTLRMaxDelay = 0;
 		MaxDelay_CH[Channel] = 0;
 		index_reg = 0x98 + 0x100 * Channel;
 
 		Receiver = mct_InitReceiver_D(pDCTstat, Channel);
-		/* There are four receiver pairs, loosely associated with chipselects. */
+		/* There are four receiver pairs, loosely associated with chipselects.
+		 * This is essentially looping over each DIMM.
+		 */
 		for (; Receiver < 8; Receiver += 2) {
 			Addl_Index = (Receiver >> 1) * 3 + 0x10;
-			LastTest = DQS_FAIL;
-
-			/* mct_ModifyIndex_D */
-			RcvrEnDlyRmin = RcvrEnDlyLimit = 0xff;
 
 			print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
 
-			if(!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
+			if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
 				continue;
 			}
 
+			/* Clear data structures */
+			for (lane = 0; lane < 8; lane++) {
+				data_test_pass_prev[lane] = 0;
+				trained[lane] = 0;
+			}
+
+			/* 2.8.9.9.2 (1, 6)
+			 * Retrieve gross and fine timing fields from write DQS registers
+			 */
+			read_dqs_write_timing_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
+
+			/* 2.8.9.9.2 (1)
+			 * Program the Write Data Timing and Write ECC Timing register to
+			 * the values stored in the DQS Write Timing Control register
+			 * for each lane
+			 */
+			for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+				uint32_t wdt_reg;
+
+				/* Calculate Write Data Timing register location */
+				if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
+					wdt_reg = 0x1;
+				if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
+					wdt_reg = 0x2;
+				if (lane == 8)
+					wdt_reg = 0x3;
+				wdt_reg |= ((Receiver / 2) << 8);
+
+				/* Set Write Data Timing register values */
+				dword = Get_NB32_index_wait(dev, index_reg, wdt_reg);
+				if ((lane == 7) || (lane == 3)) {
+					dword &= ~(0x7f << 24);
+					dword |= (current_total_delay[lane] & 0x7f) << 24;
+				}
+				if ((lane == 6) || (lane == 2)) {
+					dword &= ~(0x7f << 16);
+					dword |= (current_total_delay[lane] & 0x7f) << 16;
+				}
+				if ((lane == 5) || (lane == 1)) {
+					dword &= ~(0x7f << 8);
+					dword |= (current_total_delay[lane] & 0x7f) << 8;
+				}
+				if ((lane == 8) || (lane == 4) || (lane == 0)) {
+					dword &= ~0x7f;
+					dword |= current_total_delay[lane] & 0x7f;
+				}
+				Set_NB32_index_wait(dev, index_reg, wdt_reg, dword);
+			}
+
+			/* 2.8.9.9.2 (2)
+			 * Program the Read DQS Timing Control and the Read DQS ECC Timing Control registers
+			 * to 1/2 MEMCLK for all lanes
+			 */
+			for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+				uint32_t rdt_reg;
+				if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
+					rdt_reg = 0x5;
+				if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
+					rdt_reg = 0x6;
+				if (lane == 8)
+					rdt_reg = 0x7;
+				rdt_reg |= ((Receiver / 2) << 8);
+				if (lane == 8)
+					dword = 0x0000003f;
+				else
+					dword = 0x3f3f3f3f;
+				Set_NB32_index_wait(dev, index_reg, rdt_reg, dword);
+			}
+
+			/* 2.8.9.9.2 (3)
+			 * Select two test addresses for each rank present
+			 */
 			TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
-			if(!valid) {	/* Address not supported on current CS */
+			if (!valid) {	/* Address not supported on current CS */
 				continue;
 			}
 
@@ -229,171 +370,214 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
 			print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2);
 			print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2);
 
-			/*
-			 * Get starting RcvrEnDly value
+			/* 2.8.9.9.2 (4, 5)
+			 * Write 1 cache line of the appropriate test pattern to each test addresse
 			 */
-			RcvrEnDly = mct_Get_Start_RcvrEnDly_1Pass(Pass);
+			mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, 0); /* rank 0 of DIMM, testpattern 0 */
+			mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, 1); /* rank 0 of DIMM, testpattern 1 */
+			if (_2Ranks) {
+				mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, 0); /*rank 1 of DIMM, testpattern 0 */
+				mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, 1); /*rank 1 of DIMM, testpattern 1 */
+			}
 
-			/* mct_GetInitFlag_D*/
-			if (Pass == FirstPass) {
-				pDCTstat->DqsRcvEn_Pass = 0;
-			} else {
-				pDCTstat->DqsRcvEn_Pass=0xFF;
+#if DQS_TRAIN_DEBUG > 0
+			for (lane = 0; lane < 8; lane++) {
+				print_debug_dqs("\t\tTrainRcvEn54: lane: ", lane, 2);
+				print_debug_dqs("\t\tTrainRcvEn54: current_total_delay ", current_total_delay[lane], 2);
 			}
-			pDCTstat->DqsRcvEn_Saved = 0;
+#endif
 
+			/* 2.8.9.9.2 (6)
+			 * Write gross and fine timing fields to read DQS registers
+			 */
+			write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
+
+			/* 2.8.9.9.2 (7)
+			 * Loop over all delay values up to 1 MEMCLK (0x40 delay steps) from the initial delay values
+			 *
+			 * FIXME
+			 * It is not clear if training should be discontinued if any test failures occur in the first
+			 * 1 MEMCLK window, or if it should be discontinued if no successes occur in the first 1 MEMCLK
+			 * window.  Therefore, loop over up to 2 MEMCLK (0x80 delay steps) to be on the safe side.
+			 */
+			uint16_t current_delay_step;
 
-			while(RcvrEnDly < RcvrEnDlyLimit) {	/* sweep Delay value here */
-				print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
+			for (current_delay_step = 0; current_delay_step < 0x80; current_delay_step++) {
+				print_debug_dqs("\t\t\tTrainRcvEn541: current_delay_step ", current_delay_step, 3);
 
-				/* callback not required
-				if(mct_AdjustDelay_D(pDCTstat, RcvrEnDly))
-					goto skipDly;
+				/* 2.8.9.9.2 (7 D)
+				* Terminate if all lanes are trained
 				*/
+				uint8_t all_lanes_trained = 1;
+				for (lane = 0; lane < 8; lane++)
+					if (!trained[lane])
+						all_lanes_trained = 0;
 
-				/* Odd steps get another pattern such that even
-				 and odd steps alternate. The pointers to the
-				 patterns will be swaped at the end of the loop
-				 so that they correspond. */
-				if(RcvrEnDly & 1) {
-					PatternA = 1;
-					PatternB = 0;
-				} else {
-					/* Even step */
-					PatternA = 0;
-					PatternB = 1;
-				}
-
-				mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, PatternA); /* rank 0 of DIMM, testpattern 0 */
-				mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, PatternB); /* rank 0 of DIMM, testpattern 1 */
-				if(_2Ranks) {
-					mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, PatternA); /*rank 1 of DIMM, testpattern 0 */
-					mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, PatternB); /*rank 1 of DIMM, testpattern 1 */
-				}
-
-				mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, 0, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
-
-				CurrTest = DQS_FAIL;
-				CurrTestSide0 = DQS_FAIL;
-				CurrTestSide1 = DQS_FAIL;
-
-				mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0);	/*cache fills */
-				Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0, Channel, PatternA, Pass);/* ROM vs cache compare */
-				proc_IOCLFLUSH_D(TestAddr0);
-				ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
-
-				print_debug_dqs("\t\t\tTrainRcvEn542: Test0 result ", Test0, 3);
-
-				/* != 0x00 mean pass */
-
-				if(Test0 == DQS_PASS) {
-					mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B);	/*cache fills */
-					/* ROM vs cache compare */
-					Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0B, Channel, PatternB, Pass);
-					proc_IOCLFLUSH_D(TestAddr0B);
-					ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
-
-					print_debug_dqs("\t\t\tTrainRcvEn543: Test1 result ", Test1, 3);
+				if (all_lanes_trained)
+					break;
 
-					if(Test1 == DQS_PASS) {
-						CurrTestSide0 = DQS_PASS;
+				/* 2.8.9.9.2 (7 A)
+				* Loop over all ranks
+				*/
+				for (rank = 0; rank < (_2Ranks + 1); rank++) {
+					/* 2.8.9.9.2 (7 A a-d)
+					 * Read the first test address of the current rank
+					 * Store the first data beat for analysis
+					 * Reset read pointer in the DRAM controller FIFO
+					 * Read the second test address of the current rank
+					 * Store the first data beat for analysis
+					 * Reset read pointer in the DRAM controller FIFO
+					 */
+					if (rank & 1) {
+						/* 2.8.9.9.2 (7 D)
+						 * Invert read instructions to alternate data read order on the bus
+						 */
+						proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B);
+						result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel));
+						write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
+						proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1);
+						result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel));
+						write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
+					} else {
+						proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1);
+						result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel));
+						write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
+						proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B);
+						result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel));
+						write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
 					}
-				}
-				if(_2Ranks) {
-					mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1);	/*cache fills */
-					/* ROM vs cache compare */
-					Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1, Channel, PatternA, Pass);
-					proc_IOCLFLUSH_D(TestAddr1);
-					ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
-
-					print_debug_dqs("\t\t\tTrainRcvEn544: Test0 result ", Test0, 3);
-
-					if(Test0 == DQS_PASS) {
-						mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B);	/*cache fills */
-						/* ROM vs cache compare */
-						Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1B, Channel, PatternB, Pass);
-						proc_IOCLFLUSH_D(TestAddr1B);
-						ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
-
-						print_debug_dqs("\t\t\tTrainRcvEn545: Test1 result ", Test1, 3);
-						if(Test1 == DQS_PASS) {
-							CurrTestSide1 = DQS_PASS;
+					/* 2.8.9.9.2 (7 A e)
+					 * Compare both read patterns and flag passing ranks/lanes
+					 */
+					uint8_t result_lane_byte1;
+					uint8_t result_lane_byte2;
+					for (lane = 0; lane < 8; lane++) {
+						if (trained[lane] == 1) {
+#if DQS_TRAIN_DEBUG > 0
+							print_debug_dqs("\t\t\t\t\t\t\t\t lane already trained: ", lane, 4);
+#endif
+							continue;
 						}
+
+						result_lane_byte1 = (result_qword1 >> (lane * 8)) & 0xff;
+						result_lane_byte2 = (result_qword2 >> (lane * 8)) & 0xff;
+						if ((result_lane_byte1 == 0x55) && (result_lane_byte2 == 0xaa))
+							data_test_pass_sr[rank][lane] = 1;
+						else
+							data_test_pass_sr[rank][lane] = 0;
+#if DQS_TRAIN_DEBUG > 0
+						print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0x55, "  |  ", result_lane_byte1, 4);
+						print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0xaa, "  |  ", result_lane_byte2, 4);
+#endif
 					}
 				}
 
-				if(_2Ranks) {
-					if ((CurrTestSide0 == DQS_PASS) && (CurrTestSide1 == DQS_PASS)) {
-						CurrTest = DQS_PASS;
+				/* 2.8.9.9.2 (7 B)
+				 * If DIMM is dual rank, only use delays that pass testing for both ranks
+				 */
+				for (lane = 0; lane < 8; lane++) {
+					if (_2Ranks) {
+						if ((data_test_pass_sr[0][lane]) && (data_test_pass_sr[1][lane]))
+							data_test_pass[lane] = 1;
+						else
+							data_test_pass[lane] = 0;
+					} else {
+						data_test_pass[lane] = data_test_pass_sr[0][lane];
 					}
-				} else if (CurrTestSide0 == DQS_PASS) {
-					CurrTest = DQS_PASS;
 				}
 
-				/* record first pass DqsRcvEn to stack */
-				valid = mct_SavePassRcvEnDly_D(pDCTstat, RcvrEnDly, Channel, Receiver, Pass);
+				/* 2.8.9.9.2 (7 E)
+				 * For each lane, update the DQS receiver delay setting in support of next iteration
+				 */
+				for (lane = 0; lane < 8; lane++) {
+					if (trained[lane] == 1)
+						continue;
+
+					/* 2.8.9.9.2 (7 C a)
+					 * Save the total delay of the first success after a failure for later use
+					 */
+					if ((data_test_pass[lane] == 1) && (data_test_pass_prev[lane] == 0)) {
+						candidate_total_delay[lane] = current_total_delay[lane];
+						window_det_toggle[lane] = 0;
+					}
 
-				/* Break(1:RevF,2:DR) or not(0) FIXME: This comment deosn't make sense */
-				if(valid == 2 || (LastTest == DQS_FAIL && valid == 1)) {
-					RcvrEnDlyRmin = RcvrEnDly;
-					break;
+					/* 2.8.9.9.2 (7 C b)
+					 * If the current delay failed testing add 1/8 UI to the current delay
+					 */
+					if (data_test_pass[lane] == 0)
+						current_total_delay[lane] += 0x4;
+
+					/* 2.8.9.9.2 (7 C c)
+					 * If the current delay passed testing alternately add either 1/32 UI or 1/4 UI to the current delay
+					 * If 1.25 UI of delay have been added with no failures the lane is considered trained
+					 */
+					if (data_test_pass[lane] == 1) {
+						/* See if lane is trained */
+						if ((current_total_delay[lane] - candidate_total_delay[lane]) >= 0x28) {
+							trained[lane] = 1;
+
+							/* Calculate and set final lane delay value
+							 * The final delay is the candidate delay + 7/8 UI
+							 */
+							current_total_delay[lane] = candidate_total_delay[lane] + 0x1c;
+						} else {
+							if (window_det_toggle[lane] == 0) {
+								current_total_delay[lane] += 0x1;
+								window_det_toggle[lane] = 1;
+							} else {
+								current_total_delay[lane] += 0x8;
+								window_det_toggle[lane] = 0;
+							}
+						}
+					}
 				}
 
-				LastTest = CurrTest;
-
-				/* swap the rank 0 pointers */
-				tmp = TestAddr0;
-				TestAddr0 = TestAddr0B;
-				TestAddr0B = tmp;
-
-				/* swap the rank 1 pointers */
-				tmp = TestAddr1;
-				TestAddr1 = TestAddr1B;
-				TestAddr1B = tmp;
-
-				print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
+				/* Update delays in hardware */
+				write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
 
-				RcvrEnDly++;
-
-			}	/* while RcvrEnDly */
-
-			print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
-			print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyRmin ", RcvrEnDlyRmin, 3);
-			print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyLimit ", RcvrEnDlyLimit, 3);
-			if(RcvrEnDlyRmin == RcvrEnDlyLimit) {
-				/* no passing window */
-				pDCTstat->ErrStatus |= 1 << SB_NORCVREN;
-				Errors |= 1 << SB_NORCVREN;
-				pDCTstat->ErrCode = SC_FatalErr;
+				/* Save previous results for comparison in the next iteration */
+				for (lane = 0; lane < 8; lane++)
+					data_test_pass_prev[lane] = data_test_pass[lane];
 			}
 
-			if(RcvrEnDly > (RcvrEnDlyLimit - 1)) {
-				/* passing window too narrow, too far delayed*/
-				pDCTstat->ErrStatus |= 1 << SB_SmallRCVR;
-				Errors |= 1 << SB_SmallRCVR;
-				pDCTstat->ErrCode = SC_FatalErr;
-				RcvrEnDly = RcvrEnDlyLimit - 1;
-				pDCTstat->CSTrainFail |= 1 << Receiver;
-				pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel);
-			}
-
-			/* CHB_D0_B0_RCVRDLY set in mct_Average_RcvrEnDly_Pass */
-			mct_Average_RcvrEnDly_Pass(pDCTstat, RcvrEnDly, RcvrEnDlyLimit, Channel, Receiver, Pass);
+#if DQS_TRAIN_DEBUG > 0
+			for (lane = 0; lane < 8; lane++)
+				print_debug_dqs_pair("\t\tTrainRcvEn55: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2);
+#endif
 
-			mct_SetFinalRcvrEnDly_D(pDCTstat, RcvrEnDly, Final_Value, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
+			/* Find highest delay value and save for later use */
+			for (lane = 0; lane < 8; lane++)
+				if (current_total_delay[lane] > CTLRMaxDelay)
+					CTLRMaxDelay = current_total_delay[lane];
 
-			if(pDCTstat->ErrStatus & (1 << SB_SmallRCVR)) {
-				Errors |= 1 << SB_SmallRCVR;
+			/* See if any lanes failed training, and set error flags appropriately
+			 * For all trained lanes, save delay values for later use
+			 */
+			for (lane = 0; lane < 8; lane++) {
+				if (trained[lane]) {
+                        		pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1][lane] = current_total_delay[lane];
+				} else {
+					printk(BIOS_WARNING, "TrainRcvrEn: WARNING: Lane %d of receiver %d on channel %d failed training!\n", lane, Receiver, Channel);
+
+					/* Set error flags */
+					pDCTstat->ErrStatus |= 1 << SB_NORCVREN;
+					Errors |= 1 << SB_NORCVREN;
+					pDCTstat->ErrCode = SC_FatalErr;
+					pDCTstat->CSTrainFail |= 1 << Receiver;
+					pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel);
+				}
 			}
 
-			RcvrEnDly += Pass1MemClkDly;
-			if(RcvrEnDly > CTLRMaxDelay) {
-				CTLRMaxDelay = RcvrEnDly;
-			}
+			/* 2.8.9.9.2 (8)
+			 * Flush the receiver FIFO
+			 * Write one full cache line of non-0x55/0xaa data to one of the test addresses, then read it back to flush the FIFO
+			 */
 
-		}	/* while Receiver */
+			WriteLNTestPattern(TestAddr0 << 8, (uint8_t *)TestPattern2_D, 1);
+			mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0);
+		}
 		MaxDelay_CH[Channel] = CTLRMaxDelay;
-	}	/* for Channel */
+	}
 
 	CTLRMaxDelay = MaxDelay_CH[0];
 	if (MaxDelay_CH[1] > CTLRMaxDelay)
@@ -428,31 +612,31 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
 
 #if DQS_TRAIN_DEBUG > 0
 	{
-		u8 Channel;
+		u8 ChannelDTD;
 		printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n");
-		for(Channel = 0; Channel<2; Channel++) {
+		for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) {
 			printk(BIOS_DEBUG, "Channel:%x: %x\n",
-			       Channel, pDCTstat->CH_MaxRdLat[Channel]);
+			       ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]);
 		}
 	}
 #endif
 
 #if DQS_TRAIN_DEBUG > 0
 	{
-		u8 val;
-		u8 Channel, Receiver;
+		u16 valDTD;
+		u8 ChannelDTD, ReceiverDTD;
 		u8 i;
-		u8 *p;
+		u16 *p;
 
 		printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n");
-		for(Channel = 0; Channel < 2; Channel++) {
-			printk(BIOS_DEBUG, "Channel:%x\n", Channel);
-			for(Receiver = 0; Receiver<8; Receiver+=2) {
-				printk(BIOS_DEBUG, "\t\tReceiver:%x:", Receiver);
-				p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1];
+		for(ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) {
+			printk(BIOS_DEBUG, "Channel:%x\n", ChannelDTD);
+			for(ReceiverDTD = 0; ReceiverDTD<8; ReceiverDTD+=2) {
+				printk(BIOS_DEBUG, "\t\tReceiver:%x:", ReceiverDTD);
+				p = pDCTstat->CH_D_B_RCVRDLY[ChannelDTD][ReceiverDTD>>1];
 				for (i=0;i<8; i++) {
-					val  = p[i];
-					printk(BIOS_DEBUG, "%x ", val);
+					valDTD = p[i];
+					printk(BIOS_DEBUG, " %03x", valDTD);
 				}
 				printk(BIOS_DEBUG, "\n");
 			}
@@ -475,15 +659,6 @@ u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct)
 	}
 }
 
-static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 where, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass/*, u8 *p*/)
-{
-	/*
-	 * Program final DqsRcvEnDly to additional index for DQS receiver
-	 *  enabled delay
-	 */
-	mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, where, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
-}
-
 static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
 {
 	u8 ch_end, ch;
@@ -514,17 +689,20 @@ static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
  * Function only used once so it was inlined.
  */
 
-void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly,
+/* Set F2x[1, 0]9C_x[2B:10] DRAM DQS Receiver Enable Timing Control Registers
+ * See BKDG Rev. 3.62 page 268 for more information
+ */
+void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly,
 			u8 FinalValue, u8 Channel, u8 Receiver, u32 dev,
 			u32 index_reg, u8 Addl_Index, u8 Pass)
 {
 	u32 index;
 	u8 i;
-	u8 *p;
+	u16 *p;
 	u32 val;
 
-	if(RcvrEnDly == 0xFE) {
-		/*set the boudary flag */
+	if(RcvrEnDly == 0x1fe) {
+		/*set the boundary flag */
 		pDCTstat->Status |= 1 << SB_DQSRcvLimit;
 	}
 
@@ -543,27 +721,57 @@ void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly,
 		val = Get_NB32_index_wait(dev, index_reg, index);
 		if(i & 1) {
 			/* odd byte lane */
-			val &= ~(0xFF << 16);
-			val |= (RcvrEnDly << 16);
+			val &= ~(0x1ff << 16);
+			val |= ((RcvrEnDly & 0x1ff) << 16);
 		} else {
 			/* even byte lane */
-			val &= ~0xFF;
-			val |= RcvrEnDly;
+			val &= ~0x1ff;
+			val |= (RcvrEnDly & 0x1ff);
 		}
 		Set_NB32_index_wait(dev, index_reg, index, val);
 	}
 
 }
 
-static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly)
+/* Calculate MaxRdLatency
+ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.5
+ */
+static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly)
 {
 	u32 dev;
 	u32 reg;
-	u16 SubTotal;
+	u32 SubTotal;
 	u32 index_reg;
 	u32 reg_off;
 	u32 val;
-	u32 valx;
+
+	uint8_t cpu_val_n;
+	uint8_t cpu_val_p;
+
+	u16 freq_tab[] = {400, 533, 667, 800};
+
+	/* Set up processor-dependent values */
+	if (pDCTstat->LogicalCPUID & AMD_DR_Dx) {
+		/* Revision D and above */
+		cpu_val_n = 4;
+		cpu_val_p = 29;
+	} else if (pDCTstat->LogicalCPUID & AMD_DR_Cx) {
+		/* Revision C */
+		uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
+		if ((package_type == PT_L1)		/* Socket F (1207) */
+			|| (package_type == PT_M2)	/* Socket AM3 */
+			|| (package_type == PT_S1)) {	/* Socket S1g<x> */
+			cpu_val_n = 10;
+			cpu_val_p = 11;
+		} else {
+			cpu_val_n = 4;
+			cpu_val_p = 29;
+		}
+	} else {
+		/* Revision B and below */
+		cpu_val_n = 10;
+		cpu_val_p = 11;
+	}
 
 	if(pDCTstat->GangedMode)
 		Channel = 0;
@@ -598,49 +806,32 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQ
 	val = Get_NB32(dev, 0x78 + reg_off);
 	SubTotal += 8 - (val & 0x0f);
 
-	/* Convert bits 7-5 (also referred to as the course delay) of
+	/* Convert bits 7-5 (also referred to as the coarse delay) of
 	 * the current (or worst case) DQS receiver enable delay to
 	 * 1/2 MEMCLKs units, rounding up, and add this to the sub-total.
 	 */
-	SubTotal += DQSRcvEnDly >> 5;	/*BOZO-no rounding up */
+	SubTotal += DQSRcvEnDly >> 5;	/* Retrieve gross delay portion of value */
 
-	/* Add 5.5 to the sub-total. 5.5 represents part of the
+	/* Add "P" to the sub-total. "P" represents part of the
 	 * processor specific constant delay value in the DRAM
 	 * clock domain.
 	 */
 	SubTotal <<= 1;		/*scale 1/2 MemClk to 1/4 MemClk */
-	SubTotal += 11;		/*add 5.5 1/2MemClk */
+	SubTotal += cpu_val_p;	/*add "P" 1/2MemClk */
+	SubTotal >>= 1;		/*scale 1/4 MemClk back to 1/2 MemClk */
 
 	/* Convert the sub-total (in 1/2 MEMCLKs) to northbridge
-	 * clocks (NCLKs) as follows (assuming DDR400 and assuming
-	 * that no P-state or link speed changes have occurred).
+	 * clocks (NCLKs)
 	 */
+	SubTotal *= 200 * ((Get_NB32(pDCTstat->dev_nbmisc, 0xd4) & 0x1f) + 4);
+	SubTotal /= freq_tab[((Get_NB32(pDCTstat->dev_dct, 0x94 + reg_off) & 0x7) - 3)];
+	SubTotal = (SubTotal + (2 - 1)) / 2;	/* Round up */
 
-	/* New formula:
-	 * SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */
-	val = Get_NB32(dev, 0x94 + reg_off);
-
-	/* SubTotal div 4 to scale 1/4 MemClk back to MemClk */
-	val &= 7;
-	if (val >= 3) {
-		val <<= 1;
-	} else
-		val += 3;
-	valx = val << 2;
-
-	val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4);
-	SubTotal *= ((val & 0x1f) + 4 ) * 3;
-
-	SubTotal /= valx;
-	if (SubTotal % valx) {	/* round up */
-		SubTotal++;
-	}
-
-	/* Add 5 NCLKs to the sub-total. 5 represents part of the
+	/* Add "N" NCLKs to the sub-total. "N" represents part of the
 	 * processor specific constant value in the northbridge
 	 * clock domain.
 	 */
-	SubTotal += 5;
+	SubTotal += (cpu_val_n) / 2;
 
 	pDCTstat->CH_MaxRdLat[Channel] = SubTotal;
 	if(pDCTstat->GangedMode) {
@@ -659,143 +850,6 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQ
 	Set_NB32(dev, reg, val);
 }
 
-static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
-			u8 rcvrEnDly, u8 Channel,
-			u8 receiver, u8 Pass)
-{
-	u8 i;
-	u8 mask_Saved, mask_Pass;
-	u8 *p;
-
-	/* calculate dimm offset
-	 * not needed for CH_D_B_RCVRDLY array
-	 */
-
-	/* cmp if there has new DqsRcvEnDly to be recorded */
-	mask_Pass = pDCTstat->DqsRcvEn_Pass;
-
-	if(Pass == SecondPass) {
-		mask_Pass = ~mask_Pass;
-	}
-
-	mask_Saved = pDCTstat->DqsRcvEn_Saved;
-	if(mask_Pass != mask_Saved) {
-
-		/* find desired stack offset according to channel/dimm/byte */
-		if(Pass == SecondPass) {
-			/* FIXME: SecondPass is never used for Barcelona p = pDCTstat->CH_D_B_RCVRDLY_1[Channel][receiver>>1]; */
-			p = 0; /* Keep the compiler happy. */
-		} else {
-			mask_Saved &= mask_Pass;
-			p = pDCTstat->CH_D_B_RCVRDLY[Channel][receiver>>1];
-		}
-		for(i=0; i < 8; i++) {
-			/* cmp per byte lane */
-			if(mask_Pass & (1 << i)) {
-				if(!(mask_Saved & (1 << i))) {
-					/* save RcvEnDly to stack, according to
-					the related Dimm/byte lane */
-					p[i] = (u8)rcvrEnDly;
-					mask_Saved |= 1 << i;
-				}
-			}
-		}
-		pDCTstat->DqsRcvEn_Saved = mask_Saved;
-	}
-	return mct_SaveRcvEnDly_D_1Pass(pDCTstat, Pass);
-}
-
-static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
-					struct DCTStatStruc *pDCTstat,
-					u32 addr, u8 channel,
-					u8 pattern, u8 Pass)
-{
-	/* Compare only the first beat of data.  Since target addrs are cache
-	 * line aligned, the Channel parameter is used to determine which
-	 * cache QW to compare.
-	 */
-
-	u8 *test_buf;
-	u8 i;
-	u8 result;
-	u8 value;
-
-	if(Pass == FirstPass) {
-		if(pattern==1) {
-			test_buf = (u8 *)TestPattern1_D;
-		} else {
-			test_buf = (u8 *)TestPattern0_D;
-		}
-	} else {		/* Second Pass */
-		test_buf = (u8 *)TestPattern2_D;
-	}
-
-	SetUpperFSbase(addr);
-	addr <<= 8;
-
-	if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) {
-		addr += 8;	/* second channel */
-		test_buf += 8;
-	}
-
-	print_debug_dqs_pair("\t\t\t\t\t\t  test_buf = ", (u32)test_buf, "  |  addr_lo = ", addr,  4);
-	for (i=0; i<8; i++, addr ++) {
-		value = read32_fs(addr);
-		print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", test_buf[i], "  |  ", value, 4);
-
-		if (value == test_buf[i]) {
-			pDCTstat->DqsRcvEn_Pass |= (1<<i);
-		} else {
-			pDCTstat->DqsRcvEn_Pass &= ~(1<<i);
-		}
-	}
-
-	result = DQS_FAIL;
-
-	if (Pass == FirstPass) {
-		/* if first pass, at least one byte lane pass
-		 * ,then DQS_PASS=1 and will set to related reg.
-		 */
-		if(pDCTstat->DqsRcvEn_Pass != 0) {
-			result = DQS_PASS;
-		} else {
-			result = DQS_FAIL;
-		}
-
-	} else {
-		/* if second pass, at least one byte lane fail
-		 * ,then DQS_FAIL=1 and will set to related reg.
-		 */
-		if(pDCTstat->DqsRcvEn_Pass != 0xFF) {
-			result = DQS_FAIL;
-		} else {
-			result = DQS_PASS;
-		}
-	}
-
-	/* if second pass, we can't find the fail until FFh,
-	 * then let it fail to save the final delay
-	 */
-	if((Pass == SecondPass) && (pDCTstat->Status & (1 << SB_DQSRcvLimit))) {
-		result = DQS_FAIL;
-		pDCTstat->DqsRcvEn_Pass = 0;
-	}
-
-	/* second pass needs to be inverted
-	 * FIXME? this could be inverted in the above code to start with...
-	 */
-	if(Pass == SecondPass) {
-		if (result == DQS_PASS) {
-			result = DQS_FAIL;
-		} else if (result == DQS_FAIL) { /* FIXME: doesn't need to be else if */
-			result = DQS_PASS;
-		}
-	}
-
-
-	return result;
-}
-
 static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
 				struct DCTStatStruc *pDCTstat)
 {
@@ -854,7 +908,7 @@ void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel)
 	u32 index_reg;
 	u32 index;
 	u8 ChipSel;
-	u8 *p;
+	u16 *p;
 	u32 val;
 
 	dev = pDCTstat->dev_dct;
@@ -884,7 +938,7 @@ static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
 
 	for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
 		if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) {
-			u8 *p;
+			u16 *p;
 			p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1];
 
 			/* DQS Delay Value of Data Bytelane
@@ -920,6 +974,10 @@ static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
 	SetEccDQSRcvrEn_D(pDCTstat, Channel);
 }
 
+/* 2.8.9.9.4
+ * ECC Byte Lane Training
+ * DQS Receiver Enable Delay
+ */
 void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
 			struct DCTStatStruc *pDCTstatA)
 {
@@ -1017,7 +1075,9 @@ static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
 		avRecValue -= 3;
 	else
 	*/
-	if (pDCTstat->LogicalCPUID & AMD_DR_Cx)
+	if (pDCTstat->LogicalCPUID & AMD_DR_Dx)
+		avRecValue -= 8;
+	else if (pDCTstat->LogicalCPUID & AMD_DR_Cx)
 		avRecValue -= 8;
 	else if (pDCTstat->LogicalCPUID & AMD_DR_Bx)
 		avRecValue -= 8;
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c
index c009756..f01e011 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c
@@ -2,6 +2,7 @@
  * This file is part of the coreboot project.
  *
  * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson at raptorengineeringinc.com>, Raptor Engineering
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -36,17 +37,12 @@ u32 SetupDqsPattern_1PassB(u8 pass)
 	return (u32) TestPattern0_D;
 }
 
-u8  mct_Get_Start_RcvrEnDly_1Pass(u8 pass)
-{
-	return 0;
-}
-
-static u8 mct_Average_RcvrEnDly_1Pass(struct DCTStatStruc *pDCTstat, u8 Channel, u8 Receiver,
+static u16 mct_Average_RcvrEnDly_1Pass(struct DCTStatStruc *pDCTstat, u8 Channel, u8 Receiver,
 					u8 Pass)
 {
-	u8 i, MaxValue;
-	u8 *p;
-	u8 val;
+	u16 i, MaxValue;
+	u16 *p;
+	u16 val;
 
 	MaxValue = 0;
 	p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1];
@@ -76,8 +72,8 @@ u8 mct_SaveRcvEnDly_D_1Pass(struct DCTStatStruc *pDCTstat, u8 pass)
 	return ret;
 }
 
-u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat,
-				u8 RcvrEnDly, u8 RcvrEnDlyLimit,
+u16 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat,
+				u16 RcvrEnDly, u16 RcvrEnDlyLimit,
 				u8 Channel, u8 Receiver, u8 Pass)
 
 {
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c
index b01889d..796febc 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c
@@ -2,6 +2,7 @@
  * This file is part of the coreboot project.
  *
  * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson at raptorengineeringinc.com>, Raptor Engineering
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -74,15 +75,15 @@ u8 mct_Get_Start_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat,
 	return RcvrEnDly;
 }
 
-u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat,
-				u8 RcvrEnDly, u8 RcvrEnDlyLimit,
+u16 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat,
+				u16 RcvrEnDly, u16 RcvrEnDlyLimit,
 				u8 Channel, u8 Receiver, u8 Pass)
 {
 	u8 i;
-	u8 *p;
-	u8 *p_1;
-	u8 val;
-	u8 val_1;
+	u16 *p;
+	u16 *p_1;
+	u16 val;
+	u16 val_1;
 	u8 valid = 1;
 	u8 bn;
 
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c
index ea5c8c7..920f514 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c
@@ -2,6 +2,7 @@
  * This file is part of the coreboot project.
  *
  * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson at raptorengineeringinc.com>, Raptor Engineering
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -191,10 +192,10 @@ static void maxRdLatencyTrain_D(struct MCTStatStruc *pMCTstat,
 
 #if DQS_TRAIN_DEBUG > 0
 	{
-		u8 Channel;
+		u8 ChannelDTD;
 		printk(BIOS_DEBUG, "maxRdLatencyTrain: CH_MaxRdLat:\n");
-		for(Channel = 0; Channel<2; Channel++) {
-			printk(BIOS_DEBUG, "Channel: %02x: %02x\n", Channel, pDCTstat->CH_MaxRdLat[Channel]);
+		for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) {
+			printk(BIOS_DEBUG, "Channel: %02x: %02x\n", ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]);
 		}
 	}
 #endif
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
index cdeae49..1c3e322 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
@@ -2,6 +2,7 @@
  * This file is part of the coreboot project.
  *
  * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson at raptorengineeringinc.com>, Raptor Engineering
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -58,9 +59,9 @@ void PrepareC_DCT(struct MCTStatStruc *pMCTstat,
 	pDCTstat->C_DCTPtr[dct]->LogicalCPUID = pDCTstat->LogicalCPUID;
 
 	for (dimm = 0; dimm < MAX_DIMMS; dimm++) {
-		if (DimmValid & (1 << dimm))
+		if (DimmValid & (1 << (dimm << 1)))
 			pDCTstat->C_DCTPtr[dct]->DimmPresent[dimm] = 1;
-		if (Dimmx8Present & (1 << dimm))
+		if (Dimmx8Present & (1 << (dimm << 1)))
 			pDCTstat->C_DCTPtr[dct]->DimmX8Present[dimm] = 1;
 	}
 
@@ -88,9 +89,9 @@ void PrepareC_DCT(struct MCTStatStruc *pMCTstat,
 		u8  DimmRanks;
 		if (DimmValid & (1 << (dimm << 1))) {
 			DimmRanks = 1;
-			if (pDCTstat->DimmDRPresent & (1 << (dimm+dct)))
+			if (pDCTstat->DimmDRPresent & (1 << ((dimm << 1) + dct)))
 				DimmRanks = 2;
-			else if (pDCTstat->DimmQRPresent & (1 << (dimm+dct)))
+			else if (pDCTstat->DimmQRPresent & (1 << ((dimm << 1) + dct)))
 				DimmRanks = 4;
 		} else
 			DimmRanks = 0;
@@ -249,35 +250,6 @@ static void ChangeMemClk(struct MCTStatStruc *pMCTstat,
 	}
 }
 
-/* Multiply the previously saved delay values in Pass 1, step #5 by
-   (target frequency)/400 to find the gross and fine delay initialization
-   values at the target frequency.
- */
-void MultiplyDelay(struct MCTStatStruc *pMCTstat,
-					struct DCTStatStruc *pDCTstat, u8 dct)
-{
-	u16 index;
-	u8 Multiplier;
-	u8 gross, fine;
-	u16 total;
-
-	Multiplier = pDCTstat->TargetFreq;
-
-	for (index=0; index < MAX_BYTE_LANES*MAX_LDIMMS; index ++) {
-		gross = pDCTstat->C_DCTPtr[dct]->WLGrossDelay[index];
-		fine = pDCTstat->C_DCTPtr[dct]->WLFineDelay[index];
-
-		total = gross << 5 | fine;
-		total *= Multiplier;
-		if (total % 3)
-			total = total / 3 + 1;
-		else
-			total = total / 3;
-		pDCTstat->C_DCTPtr[dct]->WLGrossDelay[index] = (total & 0xFF) >> 5;
-		pDCTstat->C_DCTPtr[dct]->WLFineDelay[index] = total & 0x1F;
-	}
-}
-
 /*
  * the DRAM controller to bring the DRAMs out of self refresh mode.
  */
@@ -352,9 +324,9 @@ void SetTargetFreq(struct MCTStatStruc *pMCTstat,
 
 		if (!DCT1Present)
 			pDCTstat->CSPresent = pDCTstat->CSPresent_DCT[0];
-		else if (pDCTstat->GangedMode) {
+		else if (pDCTstat->GangedMode)
 			pDCTstat->CSPresent = 0;
-		} else
+		else
 			pDCTstat->CSPresent = pDCTstat->CSPresent_DCT[1];
 
 		FreqChgCtrlWrd(pMCTstat, pDCTstat);
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
index 8e7e70c..397fd77 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
@@ -2,6 +2,7 @@
  * This file is part of the coreboot project.
  *
  * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson at raptorengineeringinc.com>, Raptor Engineering
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -235,6 +236,65 @@ u32 swapBankBits(sDCTStruct *pDCTData, u32 MRSValue)
 	return MRSValue;
 }
 
+static uint16_t unbuffered_dimm_nominal_termination_emrs(uint8_t number_of_dimms, uint8_t frequency_index, uint8_t rank_count, uint8_t rank)
+{
+	uint16_t term;
+
+	/* FIXME
+	 * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
+	 * For now assume a maximum of 2 DIMMs per channel can be installed
+	 */
+	uint8_t MaxDimmsInstallable = 2;
+
+	if (number_of_dimms == 1) {
+		if (MaxDimmsInstallable < 3) {
+			term = 0x04;	/* Rtt_Nom=RZQ/4=60 Ohm */
+		} else {
+			if (rank_count == 1) {
+				term = 0x04;	/* Rtt_Nom=RZQ/4=60 Ohm */
+			} else {
+				if (rank == 0)
+					term = 0x04;	/* Rtt_Nom=RZQ/4=60 Ohm */
+				else
+					term = 0x00;	/* Rtt_Nom=OFF */
+			}
+		}
+	} else {
+		if (frequency_index < 5)
+			term = 0x0044;	/* Rtt_Nom=RZQ/6=40 Ohm */
+		else
+			term = 0x0204;	/* Rtt_Nom=RZQ/8=30 Ohm */
+	}
+
+	return term;
+}
+
+static uint16_t unbuffered_dimm_dynamic_termination_emrs(uint8_t number_of_dimms, uint8_t frequency_index, uint8_t rank_count, uint8_t rank)
+{
+	uint16_t term;
+
+	/* FIXME
+	 * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
+	 * For now assume a maximum of 2 DIMMs per channel can be installed
+	 */
+	uint8_t MaxDimmsInstallable = 2;
+
+	if (number_of_dimms == 1) {
+		if (MaxDimmsInstallable < 3) {
+			term = 0x00;	/* Rtt_WR=off */
+		} else {
+			if (rank_count == 1)
+				term = 0x00;	/* Rtt_WR=off */
+			else
+				term = 0x200;	/* Rtt_WR=RZQ/4=60 Ohm */
+		}
+	} else {
+		term = 0x400;	/* Rtt_WR=RZQ/2=120 Ohm */
+	}
+
+	return term;
+}
+
 /*-----------------------------------------------------------------------------
  *  void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *DCTData, u8 Dimm, BOOL WL)
  *
@@ -295,48 +355,23 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
 		if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
 			tempW1 = RttNomTargetRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank);
 		} else {
-			if (wl)
-			{
-				if (pDCTData->MaxDimmsInstalled == 1)
-				{
-					if ((pDCTData->DimmRanks[dimm] == 2) && (rank == 0))
-					{
-						tempW1 = 0x00;	/* Rtt_Nom=OFF */
-					}
+			if (wl) {
+				if (rank == 0) {
+					/* Get Rtt_WR for the current DIMM and rank */
+					uint16_t dynamic_term = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank);
+
+					/* Convert dynamic termination code to corresponding nominal termination code */
+					if (dynamic_term == 0x200)
+						tempW1 = 0x04;
+					else if (dynamic_term == 0x400)
+						tempW1 = 0x40;
 					else
-					{
-						tempW1 = 0x04;	/* Rtt_Nom=RZQ/4=60 Ohm */
-					}
-				}
-				else	/* 2 Dimms or more per channel */
-				{
-					if ((pDCTData->DimmRanks[dimm] == 2) && (rank == 1))
-					{
-						tempW1 = 0x00;	/* Rtt_Nom=OFF */
-					}
-					else
-					{
-						if (MemClkFreq == 6) {
-							tempW1 = 0x04;	/* Rtt_Nom=RZQ/4=60 Ohm */
-						} else {
-							tempW1 = 0x40;/* Rtt_Nom=RZQ/2=120 Ohm */
-						}
-					}
-				}
-			}
-			else {	/* 1 or 4 Dimms per channel */
-				if ((pDCTData->MaxDimmsInstalled == 1) || (pDCTData->MaxDimmsInstalled == 4))
-				{
-					tempW1 = 0x04;	/* Rtt_Nom=RZQ/4=60 Ohm */
-				}
-				else	/* 2 or 3 Dimms per channel */
-				{
-					if (MemClkFreq < 5) {
-						tempW1 = 0x0044;	/* Rtt_Nom=RZQ/6=40 Ohm */
-					} else {
-						tempW1 = 0x0204;	/* Rtt_Nom=RZQ/8=30 Ohm */
-					}
+						tempW1 = 0x0;
+				} else {
+					tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank);
 				}
+			} else {
+				tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank);
 			}
 		}
 		tempW=tempW|tempW1;
@@ -353,20 +388,22 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
 			else
 			{
 				/* Disable the output drivers of all other ranks for
-				 * the target DIMM. */
+				 * the target DIMM.
+				 */
 				tempW = bitTestSet(tempW1, Qoff);
 			}
 		}
-		/* program MrsAddress[5,1]=output driver impedance control (DIC):
-		 * based on F2x[1,0]84[DrvImpCtrl] */
+		/* Program MrsAddress[5,1]=output driver impedance control (DIC):
+		 * based on F2x[1,0]84[DrvImpCtrl]
+		 */
 		tempW1 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
 				FUN_DCT, DRAM_MRS_REGISTER, DrvImpCtrlStart, DrvImpCtrlEnd);
-		if (bitTest(tempW1,1))
-		{tempW = bitTestSet(tempW, 5);}
-		if (bitTest(tempW1,0))
-		{tempW = bitTestSet(tempW, 1);}
+		if (bitTest(tempW1, 1))
+			tempW = bitTestSet(tempW, 5);
+		if (bitTest(tempW1, 0))
+			tempW = bitTestSet(tempW, 1);
 
-		tempW = swapAddrBits_wl(pDCTData,tempW);
+		tempW = swapAddrBits_wl(pDCTData, tempW);
 
 		set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
 			DRAM_INIT, MrsAddressStart, MrsAddressEnd, tempW);
@@ -404,29 +441,10 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
 		if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED]))
 			tempW+=0x8;
 		/* determine Rtt_WR for WL & Normal mode */
-		if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
+		if (pDCTData->Status[DCT_STATUS_REGISTERED])
 			tempW1 = RttWrRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank);
-		} else {
-			if (wl)
-			{
-				tempW1 = 0x00;	/* Rtt_WR=off */
-			}
-			else
-			{
-				if (pDCTData->MaxDimmsInstalled == 1)
-				{
-					tempW1 = 0x00;	/* Rtt_WR=off */
-				}
-				else
-				{
-					if (MemClkFreq == 6) {
-						tempW1 = 0x200;	/* Rtt_WR=RZQ/4=60 Ohm */
-					} else {
-						tempW1 = 0x400;	/* Rtt_WR=RZQ/2 */
-					}
-				}
-			}
-		}
+		else
+			tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank);
 		tempW=tempW|tempW1;
 		tempW = swapAddrBits_wl(pDCTData,tempW);
 		set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
@@ -483,38 +501,10 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
 					}
 
 					/* determine Rtt_Nom for WL & Normal mode */
-					if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
+					if (pDCTData->Status[DCT_STATUS_REGISTERED])
 						tempW1 = RttNomNonTargetRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank);
-					} else {
-						if (wl)
-						{
-							if ((pDCTData->DimmRanks[currDimm] == 2) && (rank == 1))
-							{
-								tempW1 = 0x00;	/* Rtt_Nom=OFF */
-							}
-							else
-							{
-								if (MemClkFreq < 5) {
-									tempW1 = 0x0044;/* Rtt_Nom=RZQ/6=40 Ohm */
-								} else {
-									tempW1 = 0x0204;/* Rtt_Nom=RZQ/8=30 Ohm */
-								}
-							}
-						}
-						else {	/* 1 or 4 Dimms per channel */
-							if (pDCTData->MaxDimmsInstalled == 4)
-							{
-								tempW1 = 0x04;	/* Rtt_Nom=RZQ/4=60 Ohm */
-							}
-							else {	/* 2 or 3 Dimms per channel */
-								if (MemClkFreq < 5) {
-									tempW1 = 0x0044;	/* Rtt_Nom=RZQ/6=40 Ohm */
-								} else {
-									tempW1 = 0x0204;	/* Rtt_Nom=RZQ/8=30 Ohm */
-								}
-							}
-						}
-					}
+					else
+						tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank);
 					tempW=tempW|tempW1;
 					/* program MrsAddress[5,1]=output driver impedance control (DIC):
 					 * based on F2x[1,0]84[DrvImpCtrl] */
@@ -560,22 +550,10 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
 					if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED]))
 						tempW+=0x8;
 					/* determine Rtt_WR for WL & Normal mode */
-					if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
+					if (pDCTData->Status[DCT_STATUS_REGISTERED])
 						tempW1 = RttWrRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank);
-					} else {
-						if (wl)
-						{
-							tempW1 = 0x00;	/* Rtt_WR=off */
-						}
-						else
-						{
-							if (MemClkFreq == 6) {
-								tempW1 = 0x200;	/* Rtt_WR=RZQ/4=60 Ohm */
-							} else {
-								tempW1 = 0x400;	/* Rtt_WR=RZQ/2 */
-							}
-						}
-					}
+					else
+						tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank);
 					tempW=tempW|tempW1;
 					tempW = swapAddrBits_wl(pDCTData,tempW);
 					set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
@@ -646,9 +624,14 @@ void programODT(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm)
  */
 void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
 {
-	u8 ByteLane, Seed_Gross, Seed_Fine;
+	u8 ByteLane, Seed_Gross, Seed_Fine, MemClkFreq;
 	u32 Value, Addr;
 	u16 Addl_Data_Offset, Addl_Data_Port;
+	u16 freq_tab[] = {400, 533, 667, 800};
+
+	/* MemClkFreq: 3: 400MHz; 4: 533MHz; 5: 667MHz; 6: 800MHz */
+	MemClkFreq = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
+				FUN_DCT, DRAM_CONFIG_HIGH, 0, 2);
 
 	/* Program F2x[1, 0]9C_x08[WrLvOdt[3:0]] to the proper ODT settings for the
 	 * current memory subsystem configuration.
@@ -656,12 +639,13 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
 	programODT(pMCTData, pDCTData, dimm);
 
 	/* Program F2x[1,0]9C_x08[WrLvOdtEn]=1 */
-	if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx))
+	if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx)) {
 		set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT,
 				DRAM_ADD_DCT_PHY_CONTROL_REG, WrLvOdtEn, WrLvOdtEn, (u32)1);
+	}
 	else
 	{
-		/* Program WrLvOdtEn=1 through set bit 12 of D3CSODT reg offset 0 for Rev.B*/
+		/* Program WrLvOdtEn=1 through set bit 12 of D3CSODT reg offset 0 for Rev.B */
 		if (pDCTData->DctTrain)
 		{
 			Addl_Data_Offset=0x198;
@@ -687,7 +671,6 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
 
 	/* Wait 10 MEMCLKs to allow for ODT signal settling. */
 	pMCTData->AgesaDelay(10);
-	ByteLane = 0;
 	if (pass == 1)
 	{
 		if (pDCTData->Status[DCT_STATUS_REGISTERED])
@@ -705,10 +688,17 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
 		}
 		else
 		{
-			Seed_Gross = 0x00;
-			Seed_Fine = 0x1A;
+			if (MemClkFreq == 6) {
+				/* DDR-800 */
+				Seed_Gross = 0x00;
+				Seed_Fine = 0x1a;
+			} else {
+				/* Use settings for DDR-400 (interpolated from BKDG) */
+				Seed_Gross = 0x00;
+				Seed_Fine = 0x0d;
+			}
 		}
-		while(ByteLane < MAX_BYTE_LANES)
+		for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++)
 		{
 			/* Program an initialization value to registers F2x[1, 0]9C_x[51:50] and
 			 * F2x[1, 0]9C_x52 to set the gross and fine delay for all the byte lane fields
@@ -720,35 +710,32 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
 			 */
 			pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross;
 			pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
-			ByteLane++;
 		}
-	} else if (pDCTData->Status[DCT_STATUS_REGISTERED]) {		/* For Pass 2 */
+	} else { 		/* Pass 2 */
 		/* From BKDG, Write Leveling Seed Value. */
-		/* TODO: The unbuffered DIMMs are unstable on the code below. So temporarily it is
-		 * only for registered DIMMs. */
 		u32 RegisterDelay, SeedTotal;
-		u8 MemClkFreq;
-		u16 freq_tab[] = {400, 533, 667, 800};
-		while(ByteLane < MAX_BYTE_LANES)
+		for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++)
 		{
-			MemClkFreq = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
-					      FUN_DCT, DRAM_CONFIG_HIGH, 0, 2);
 			if (pDCTData->Status[DCT_STATUS_REGISTERED])
 				RegisterDelay = 0x20; /* TODO: ((RCW2 & BIT0) == 0) ? 0x20 : 0x30; */
 			else
 				RegisterDelay = 0;
-			SeedTotal = (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1F) |
-				pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5;
+			SeedTotal = (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) |
+				(pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5);
 			/* SeedTotalPreScaling = (the total delay value in F2x[1, 0]9C_x[4A:30] from pass 1 of write levelization
 			   training) - RegisterDelay. */
-			/* MemClkFreq: 3: 400MHz; 4: 533MHz; 5: 667MHz; 6: 800MHz */
-			SeedTotal = (u16) (RegisterDelay + ((((u32) SeedTotal - RegisterDelay) *
-							     freq_tab[MemClkFreq-3]) / 400));
-			Seed_Gross = (SeedTotal & 0x20) != 0 ? 1 : 2;
-			Seed_Fine = SeedTotal & 0x1F;
+			SeedTotal = (uint16_t) (RegisterDelay + ((((uint64_t) SeedTotal - RegisterDelay) *
+								freq_tab[MemClkFreq-3] * 100) / (freq_tab[0] * 100)));
+			Seed_Gross = SeedTotal / 32;
+			Seed_Fine = SeedTotal & 0x1f;
+			if (Seed_Gross == 0)
+				Seed_Gross = 0;
+			else if (Seed_Gross & 0x1)
+				Seed_Gross = 1;
+			else
+				Seed_Gross = 2;
 			pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross;
 			pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
-			ByteLane ++;
 		}
 	}
 
diff --git a/src/northbridge/amd/amdmct/wrappers/mcti_d.c b/src/northbridge/amd/amdmct/wrappers/mcti_d.c
index ea32893..c00cf24 100644
--- a/src/northbridge/amd/amdmct/wrappers/mcti_d.c
+++ b/src/northbridge/amd/amdmct/wrappers/mcti_d.c
@@ -59,6 +59,10 @@ static u16 mctGet_NVbits(u8 index)
 		val = 1;
 #elif CONFIG_CPU_SOCKET_TYPE == 0x13	/* ASB2 */
 		val = 4;
+#elif CONFIG_CPU_SOCKET_TYPE == 0x14	/* C32 */
+		val = 5;
+#elif CONFIG_CPU_SOCKET_TYPE == 0x15	/* G34 */
+		val = 3;
 //#elif SYSTEM_TYPE == MOBILE
 //		val = 2;
 #endif
@@ -297,6 +301,8 @@ static void mctGet_MaxLoadFreq(struct DCTStatStruc *pDCTstat)
 	/* Determine the number of installed DIMMs */
 	int ch1_count = 0;
 	int ch2_count = 0;
+	uint8_t ch1_registered = 0;
+	uint8_t ch2_registered = 0;
 	int i;
 	for (i = 0; i < 15; i = i + 2) {
 		if (pDCTstat->DIMMValid & (1 << i))
@@ -304,6 +310,12 @@ static void mctGet_MaxLoadFreq(struct DCTStatStruc *pDCTstat)
 		if (pDCTstat->DIMMValid & (1 << (i + 1)))
 			ch2_count++;
 	}
+	for (i = 0; i < MAX_DIMMS_SUPPORTED; i = i + 2) {
+		if (pDCTstat->DimmRegistered[i])
+			ch1_registered = 1;
+		if (pDCTstat->DimmRegistered[i + 1])
+			ch2_registered = 1;
+	}
 	if (IS_ENABLED(CONFIG_DEBUG_RAM_SETUP)) {
 		printk(BIOS_DEBUG, "mctGet_MaxLoadFreq: Channel 1: %d DIMM(s) detected\n", ch1_count);
 		printk(BIOS_DEBUG, "mctGet_MaxLoadFreq: Channel 2: %d DIMM(s) detected\n", ch2_count);
@@ -413,101 +425,6 @@ static void mctHookAfterDramInit(void)
 }
 
 #if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */
-static void coreDelay(u32 microseconds)
-{
-	msr_t now;
-	msr_t end;
-	u32 cycles;
-
-	/* delay ~40us
-	   This seems like a hack to me...
-	   It would be nice to have a central delay function. */
-
-	cycles = (microseconds * 100) << 3;  /* x8 (number of 1.25ns ticks) */
-
-        if (!(rdmsr(HWCR).lo & TSC_FREQ_SEL_MASK)) {
-            msr_t pstate_msr = rdmsr(CUR_PSTATE_MSR);
-            if (!(rdmsr(0xC0010064+pstate_msr.lo).lo & NB_DID_M_ON)) {
-	      cycles = cycles <<1; // half freq, double cycles
-	    }
-	} // else should we keep p0 freq at the time of setting TSC_FREQ_SEL_MASK somewhere and check it here ?
-
-	now = rdmsr(TSC_MSR);
-        // avoid overflow when called near 2^32 ticks ~ 5.3 s boundaries
-	if (0xffffffff - cycles >= now.lo ) {
-	  end.hi =  now.hi;
-          end.lo = now.lo + cycles;
-	} else {
-          end.hi = now.hi +1; //
-          end.lo = cycles - (1+(0xffffffff - now.lo));
-	}
-	do {
-          now = rdmsr(TSC_MSR);
-        } while ((now.hi < end.hi) || ((now.hi == end.hi) && (now.lo < end.lo)));
-}
-
-/* Erratum 350 */
-static void vErrata350(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat)
-{
-	u8 u8Channel;
-	u8 u8Receiver;
-	u32 u32Addr;
-	u8 u8Valid;
-	u32 u32DctDev;
-
-	// 1. dummy read for each installed DIMM */
-	for (u8Channel = 0; u8Channel < 2; u8Channel++) {
-		// This will be 0 for vaild DIMMS, eles 8
-		u8Receiver = mct_InitReceiver_D(pDCTstat, u8Channel);
-
-		for (; u8Receiver < 8; u8Receiver += 2) {
-			u32Addr = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, u8Channel, u8Receiver, &u8Valid);
-
-			if(!u8Valid) {	/* Address not supported on current CS */
-				print_t("vErrata350: Address not supported on current CS\n");
-				continue;
-			}
-			print_t("vErrata350: dummy read \n");
-			read32_fs(u32Addr);
-		}
-	}
-
-	print_t("vErrata350: step 2a\n");
-
-	/* 2. Write 0000_8000h to register F2x[1, 0]9C_xD080F0C. */
-	u32DctDev = pDCTstat->dev_dct;
-	Set_NB32_index_wait(u32DctDev, 0x098, 0xD080F0C, 0x00008000);
-	/*                                                ^--- value
-	                                        ^---F2x[1, 0]9C_x0D080F0C, No description in BKDG.
-	                                 ^----F2x[1, 0]98 DRAM Controller Additional Data Offset Register */
-
-	if(!pDCTstat->GangedMode) {
-		print_t("vErrata350: step 2b\n");
-		Set_NB32_index_wait(u32DctDev, 0x198, 0xD080F0C, 0x00008000);
-		/*                                                ^--- value
-		                                        ^---F2x[1, 0]9C_x0D080F0C, No description in BKDG
-		                                ^----F2x[1, 0]98 DRAM Controller Additional Data Offset Register */
-	}
-
-	print_t("vErrata350: step 3\n");
-	/* 3. Wait at least 300 nanoseconds. */
-	coreDelay(1);
-
-	print_t("vErrata350: step 4\n");
-	/* 4. Write 0000_0000h to register F2x[1, 0]9C_xD080F0C. */
-	Set_NB32_index_wait(u32DctDev, 0x098, 0xD080F0C, 0x00000000);
-
-	if(!pDCTstat->GangedMode) {
-		print_t("vErrata350: step 4b\n");
-		Set_NB32_index_wait(u32DctDev, 0x198, 0xD080F0C, 0x00000000);
-	}
-
-	print_t("vErrata350: step 5\n");
-	/* 5. Wait at least 2 microseconds. */
-	coreDelay(2);
-
-}
-
 static void vErratum372(struct DCTStatStruc *pDCTstat)
 {
         msr_t msr = rdmsr(NB_CFG_MSR);
@@ -546,8 +463,7 @@ static void mctHookBeforeAnyTraining(struct MCTStatStruc *pMCTstat, struct DCTSt
 {
 #if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */
   /* FIXME :  as of 25.6.2010 errata 350 and 372 should apply to  ((RB|BL|DA)-C[23])|(HY-D[01])|(PH-E0) but I don't find constants for all of them */
-	if (pDCTstatA->LogicalCPUID & AMD_DRBH_Cx) {
-		vErrata350(pMCTstat, pDCTstatA);
+	if (pDCTstatA->LogicalCPUID & (AMD_DRBH_Cx | AMD_DR_Dx)) {
 		vErratum372(pDCTstatA);
 		vErratum414(pDCTstatA);
 	}



More information about the coreboot-gerrit mailing list