[coreboot-gerrit] Change in coreboot[master]: [WIP]nb/intel/x4x/raminit: Implement read and write DQ DQS t...
Arthur Heymans (Code Review)
gerrit at coreboot.org
Wed May 24 22:10:38 CEST 2017
Arthur Heymans has uploaded a new change for review. ( https://review.coreboot.org/19879 )
Change subject: [WIP]nb/intel/x4x/raminit: Implement read and write DQ DQS training
......................................................................
[WIP]nb/intel/x4x/raminit: Implement read and write DQ DQS training
This is not DDR3 specific.
Change-Id: I806840445b5e768d079910fb9870a2cee7b9f1ca
Signed-off-by: Arthur Heymans <arthur at aheymans.xyz>
---
M src/northbridge/intel/x4x/dq_dqsl_dll.c
M src/northbridge/intel/x4x/raminit_ddr23.c
M src/northbridge/intel/x4x/x4x.h
3 files changed, 462 insertions(+), 1 deletion(-)
git pull ssh://review.coreboot.org:29418/coreboot refs/changes/79/19879/1
diff --git a/src/northbridge/intel/x4x/dq_dqsl_dll.c b/src/northbridge/intel/x4x/dq_dqsl_dll.c
index a1dea68..b7697d4 100644
--- a/src/northbridge/intel/x4x/dq_dqsl_dll.c
+++ b/src/northbridge/intel/x4x/dq_dqsl_dll.c
@@ -437,3 +437,447 @@
printk(BIOS_DEBUG, "Done write levelling.\n");
}
+
+static const u32 write_training_schedule[] = {
+ 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
+ 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
+ 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
+ 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
+ 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
+ 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
+ 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
+ 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
+ 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
+ 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
+ 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
+ 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
+ 0x03030303, 0x04040404, 0x09090909, 0x10101010,
+ 0x21212121, 0x40404040, 0x81818181, 0x00000000,
+ 0x03030303, 0x04040404, 0x09090909, 0x10101010,
+ 0x21212121, 0x40404040, 0x81818181, 0x00000000,
+ 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
+ 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
+ 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
+ 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
+};
+
+#define PATTERN_SIZE 80
+
+static int test_dq_aligned(struct sysinfo *s, u8 channel, u32 address)
+{
+ u8 error_found = 0, lane, count;
+ u8 data[8];
+ u16 i;
+ u32 content;
+
+ for (i = 0; i < PATTERN_SIZE; i++) {
+ for (count = 0; count < PATTERN_SIZE; count++) {
+ if ((count % 16) == 0)
+ MCHBAR32(0xf90) = 1;
+ content = write_training_schedule[count];
+ write32((u32 *)address + 8 * count, content);
+ write32((u32 *)address + 8 * count + 4, content);
+ }
+
+ write32(&data[0], read32((u32 *)address + 8 * i));
+ write32(&data[4], read32((u32 *)address + 8 * i + 4));
+ for (lane = 0; lane < 8; lane++) {
+ if (data[lane] != (write_training_schedule[i] & 0xff))
+ error_found |= (1 << lane);
+ }
+ /* No need to continue now */
+ if (error_found == 0xff)
+ return 0xff;
+ }
+ return error_found;
+}
+
+#define SUCCEEDING 0
+#define FAILING 1
+#define CONSISTENCY 10
+
+/*
+ * This function finds either failing or succeeding writes by increasing DQ.
+ * When it has found a failing or succeeding setting it will increase DQ
+ * another 10 times to make sure the result is consistent.
+ * This means that the middle between failing and succeeding writes is shifted
+ * by 9 steps, which need to be substracted later.
+ */
+static int find_dq_limit(struct sysinfo *s, u8 channel, u32 address,
+ struct dll_setting *dq_setting, u8 *dq_lim,
+ u8 direction)
+{
+ int status;
+ u8 lane_passes[8] = { };
+ u8 sample = 0xff;
+ u8 lane;
+ u8 lane_err;
+
+ for (lane = 0; lane < 8; lane++)
+ dqset(channel, lane, &dq_setting[lane]);
+
+ while(sample) {
+ status = 0;
+ /* TODO: This is probably more readable if done one lane at the time */
+ lane_err = test_dq_aligned(s, channel, address);
+ lane_err ^= 0xff * direction;
+ for (lane = 0; lane < 8; lane++) {
+ if (lane_err & (1 << lane)) {
+ /* reuse function for DQ DLL settings */
+ status = increment_dqs(s, &dq_setting[lane]);
+ dqset(channel, lane, &dq_setting[lane]);
+ dq_lim[lane]++;
+ } else if (lane_passes[lane] < CONSISTENCY) {
+ status = increment_dqs(s, &dq_setting[lane]);
+ dqset(channel, lane, &dq_setting[lane]);
+ dq_lim[lane]++;
+ lane_passes[lane]++;
+ } else if (lane_passes[lane] == CONSISTENCY) {
+ sample &= ~(1 << lane);
+ }
+ }
+ if (status) {
+ if (direction == 0) {
+ printk(BIOS_ERR, "Could not find good Write training settings\n");
+ return 1;
+ } else {
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+/*
+ * Increase DQ until writes succeed, then further increase DQ until it fails.
+ * Use the middle between this working lower limit and this failing upper
+ * limit.
+ */
+int do_write_training(struct sysinfo *s)
+{
+ int i;
+ u8 channel, lane;
+ u32 address;
+ u8 dq_lower_r0[8];
+ u8 dq_upper_r0[8];
+ u8 dq_lower_r2[8];
+ u8 dq_upper_r2[8];
+ u8 dq_center[8];
+ struct dll_setting dq_setting[8];
+ u8 dq_average;
+ u32 dq_absolute;
+
+ FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
+ printk(BIOS_DEBUG, "Doing DQ write training on CH%d\n", channel);
+ address = 0x20000000 * channel;
+ for (i = 0; (i < RANKS_PER_CHANNEL) && !RANK_IS_POPULATED(s->dimms, channel, i); i++)
+ address += 128 * MiB;
+
+ dq_average = 0;
+ dq_absolute = 0;
+ memset(dq_lower_r0, 0, sizeof(dq_lower_r0));
+ memset(dq_lower_r2, 0, sizeof(dq_lower_r2));
+
+ memset(dq_center, 0, sizeof(dq_center));
+ for (lane = 0; lane < 8; lane++) {
+ /* Start from DQS settings */
+ s->dq_settings[channel][lane] = ddr3_dll_setting_800[s->nmode - 1][DQS1 + lane];
+ dq_setting[lane] = ddr3_dll_setting_800[s->nmode - 1][DQS1 + lane]; /* TODO per freq */
+ /* dq_setting[lane] = s->dqs_settings[channel][lane]; */
+ }
+
+ if (find_dq_limit(s, channel, address, dq_setting, dq_lower_r0,
+ SUCCEEDING)) {
+ printk(BIOS_CRIT, "Could not find working lower limit DQ setting\n");
+ return -1;
+ }
+ printk(BIOS_DEBUG, "Lower Limit for DQ on rank 0:\n");
+ for (lane = 0; lane < 8; lane++) {
+ printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n",
+ channel, lane, dq_lower_r0[lane]);
+ }
+
+ memcpy(dq_upper_r0, dq_lower_r0, sizeof(dq_upper_r0));
+ if (find_dq_limit(s, channel, address, dq_setting, dq_upper_r0,
+ FAILING)) {
+ printk(BIOS_CRIT, "Could not find failing upper limit DQ setting\n");
+ return -1;
+ }
+
+ printk(BIOS_DEBUG, "Upper Limit for DQ on rank 0:\n");
+ for (lane = 0; lane < 8; lane++) {
+ printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n",
+ channel, lane, dq_upper_r0[lane]);
+ }
+
+ if (RANK_IS_POPULATED(s->dimms, channel, 0)
+ && RANK_IS_POPULATED(s->dimms, channel, 2)) {
+ address += 256 * MiB;
+
+ for (lane = 0; lane < 8; lane++)
+ dq_setting[lane] = s->dqs_settings[channel][lane]; /* TODO */
+
+ if (find_dq_limit(s, channel, address, dq_setting, dq_lower_r2,
+ SUCCEEDING)) {
+ printk(BIOS_CRIT, "Could not find working lower limit DQ setting\n");
+ return -1;
+ }
+ printk(BIOS_DEBUG, "Lower Limit for DQ on rank 2:\n");
+ for (lane = 0; lane < 8; lane++) {
+ printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n",
+ channel, lane, dq_lower_r2[lane]);
+ }
+
+ memcpy(dq_upper_r2, dq_lower_r2, sizeof(dq_upper_r2));
+ if (find_dq_limit(s, channel, address, dq_setting, dq_upper_r2,
+ FAILING)) {
+ printk(BIOS_CRIT, "Could not find failing upper limit DQ setting\n");
+ return -1;
+ }
+ printk(BIOS_DEBUG, "Upper Limit for DQ on rank 2:\n");
+ for (lane = 0; lane < 8; lane++) {
+ printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n",
+ channel, lane, dq_upper_r2[lane]);
+ }
+
+ for (lane = 0; lane < 8; lane++) {
+ dq_upper_r0[lane] = MIN(dq_upper_r0[lane], dq_upper_r2[lane]);
+ dq_lower_r0[lane] = MAX(dq_lower_r0[lane], dq_lower_r2[lane]);
+ }
+ } /* If Rank 2 is present */
+
+ for (lane = 0; lane < 8; lane++) {
+ dq_lower_r0[lane] -= 9;
+ dq_upper_r0[lane] -= 9;
+ }
+
+ for (lane = 0; lane < 8; lane++)
+ dq_center[lane] = (dq_upper_r0[lane] + dq_lower_r0[lane]) / 2;
+
+ printk(BIOS_DEBUG, "Centered values for DQ DLL:\n");
+ for (lane = 0; lane < 8; lane++) {
+ printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n",
+ channel, lane, dq_center[lane]);
+ }
+
+ /* Reset DQ DLL settings and increment with centered value*/
+ for (lane = 0; lane < 8; lane++) {
+ dq_setting[lane] = ddr3_dll_setting_800[s->nmode - 1][DQS1 + lane];
+ for (i = 0; i < dq_center[lane]; i++)
+ increment_dqs(s, &s->dq_settings[channel][lane]);
+ dqset(channel, lane, &s->dq_settings[channel][lane]);
+ }
+ }
+ return 0;
+}
+
+
+static void rt_set_dqs(u8 channel, u8 lane, struct rt_dqs_setting *dqs_setting)
+{
+ u8 saved_tap = MCHBAR8(0x540 + 0x400 * channel + lane * 4);
+ u8 saved_pi = MCHBAR8(0x542 + 0x400 * channel + lane * 4);
+ printk(RAM_SPEW, "RT DQS: ch%d, L%d, %d.%d, ", channel, lane,
+ dqs_setting->tap,
+ dqs_setting->pi);
+
+ saved_tap &= ~0xf;
+ saved_tap |= dqs_setting->tap;
+ MCHBAR8(0x540 + 0x400 * channel + lane * 4) = saved_tap;
+
+ saved_pi &= ~0x7;
+ saved_pi |= dqs_setting->pi;
+ MCHBAR8(0x542 + 0x400 * channel + lane * 4) = saved_pi;
+ printk(RAM_SPEW, " %d.%d\n",
+ MCHBAR8(0x540 + 0x400 * channel + lane * 4) & 0xf,
+ MCHBAR8(0x542 + 0x400 * channel + lane * 4) & 0x7);
+}
+
+static int rt_increment_dqs(struct rt_dqs_setting *setting)
+{
+ if (setting->pi < 7) {
+ setting->pi++;
+ } else if (setting->tap < 14) {
+ setting->pi = 0;
+ setting->tap++;
+ } else {
+ return -1;
+ }
+ return 0;
+}
+
+static u8 test_dqs_aligned(struct sysinfo *s, u8 channel, u32 address)
+{
+ u8 error_lane = 0;
+ u8 data8[8];
+ int i, lane;
+
+ for (i = 0; i < PATTERN_SIZE; i++) {
+ write32(&data8[0], read32((u32 *)address + i * 8));
+ write32(&data8[4], read32((u32 *)address + i * 8 + 4));
+ for (lane = 0; lane < 8; lane++) {
+ if (data8[lane] != (write_training_schedule[i] & 0xff))
+ error_lane |= (1 << lane);
+ }
+ /* No need to continue now */
+ if (error_lane == 0xff)
+ break;
+
+ }
+ return error_lane;
+}
+
+static int rt_find_dqs_limit(struct sysinfo *s, u8 channel, u32 address,
+ struct rt_dqs_setting *dqs_setting, u8 *dqs_lim,
+ u8 direction)
+{
+ int lane;
+ u8 sample = 0xff, lane_err;
+
+ for (lane = 0; lane < 8; lane++)
+ rt_set_dqs(channel, lane, &dqs_setting[lane]);
+
+ while(sample) {
+ /* TODO: This is probably more readable if done one lane at the time */
+ lane_err = test_dqs_aligned(s, channel, address);
+ lane_err ^= 0xff * direction;
+ for (lane = 0; lane < 8; lane++) {
+ /* Checking lanes that have already been done is a good idea */
+ /* since those can be found bad again. */
+ if (lane_err & (1 << lane)) {
+ if (rt_increment_dqs(&dqs_setting[lane])) {
+ if (direction == SUCCEEDING) {
+ printk(BIOS_CRIT, "Could not find RT DQS setting\n");
+ return -1;
+ } else {
+ sample &= ~(1 << lane);
+ continue;
+ }
+ }
+ dqs_lim[lane]++;
+ rt_set_dqs(channel, lane, &dqs_setting[lane]);
+ } else {
+ sample &= ~(1 << lane);
+ }
+ }
+ }
+ return 0;
+}
+
+#define RT_LOOPS 3
+
+/*
+ * Increase DQS until read succeed, then further increase DQS until it fails.
+ * Use the middle between this working lower limit and this failing upper
+ * limit.
+ * To improve statistics this is done RT_LOOPS amount of timings, while additioning
+ * the centered values to some saved values. At the end the saved values are divided
+ * by RT_LOOPS.
+ */
+int do_read_training(struct sysinfo *s)
+{
+ int loop, channel, i, lane;
+ u32 address, content;
+ u8 dqs_lower_r0[8];
+ u8 dqs_upper_r0[8];
+ u8 dqs_lower_r2[8];
+ u8 dqs_upper_r2[8];
+ u8 dqs_center[8];
+ u16 saved_dqs[2][8] = { };
+
+ struct rt_dqs_setting dqs_setting[8];
+
+ for (loop = 0; loop < RT_LOOPS; loop++) {
+ FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
+ printk(BIOS_DEBUG, "Doing DQS write training on CH%d\n", channel);
+ address = 0x20000000 * channel;
+ for (i = 0; !RANK_IS_POPULATED(s->dimms, channel, i)
+ && i < RANKS_PER_CHANNEL; i++)
+ address += 128 * MiB;
+
+ /* Write pattern to strobe address*/
+ for (i = 0; i < PATTERN_SIZE; i++) {
+ content = write_training_schedule[i];
+ write32((u32 *)address + 8 * i, content);
+ write32((u32 *)address + 8 * i + 4, content);
+ }
+
+ memset(dqs_lower_r0, 0, sizeof(dqs_lower_r0));
+ memset(dqs_lower_r2, 0, sizeof(dqs_lower_r2));
+ memset(dqs_setting, 0, sizeof(dqs_setting));
+
+ if (rt_find_dqs_limit(s, channel, address, dqs_setting, dqs_lower_r0,
+ SUCCEEDING)) {
+ printk(BIOS_CRIT, "Could not find working lower limit DQS setting\n");
+ return -1;
+ }
+ printk(BIOS_DEBUG, "Lower Limit for DQS on rank 0:\n");
+ for (lane = 0; lane < 8; lane++) {
+ printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n",
+ channel, lane, dqs_lower_r0[lane]);
+ }
+
+ memcpy(dqs_upper_r0, dqs_lower_r0, sizeof(dqs_upper_r0));
+ if (rt_find_dqs_limit(s, channel, address, dqs_setting, dqs_upper_r0,
+ FAILING)) {
+ printk(BIOS_CRIT, "Could not find failing upper limit DQ setting\n");
+ return -1;
+ }
+
+ printk(BIOS_DEBUG, "Upper Limit for DQS on rank 0:\n");
+ for (lane = 0; lane < 8; lane++) {
+ printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n",
+ channel, lane, dqs_upper_r0[lane]);
+ }
+
+ if (RANK_IS_POPULATED(s->dimms, channel, 0)
+ && RANK_IS_POPULATED(s->dimms, channel, 2)) {
+ address += 256 * MiB;
+
+ memset(dqs_setting, 0 , sizeof(dqs_setting));
+
+ if (rt_find_dqs_limit(s, channel, address, dqs_setting, dqs_lower_r2,
+ SUCCEEDING)) {
+ printk(BIOS_CRIT, "Could not find working lower limit DQS setting\n");
+ return -1;
+ }
+ printk(BIOS_DEBUG, "Lower Limit for DQS on rank 2:\n");
+ for (lane = 0; lane < 8; lane++) {
+ printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n",
+ channel, lane, dqs_lower_r2[lane]);
+ }
+
+ memcpy(dqs_upper_r2, dqs_lower_r2, sizeof(dqs_upper_r2));
+ if (rt_find_dqs_limit(s, channel, address, dqs_setting, dqs_upper_r2,
+ FAILING)) {
+ printk(BIOS_CRIT, "Could not find failing upper limit DQS setting\n");
+ return -1;
+ }
+ printk(BIOS_DEBUG, "Upper Limit for DQS on rank 2:\n");
+ for (lane = 0; lane < 8; lane++) {
+ printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n",
+ channel, lane, dqs_upper_r2[lane]);
+ }
+
+ for (lane = 0; lane < 8; lane++) {
+ dqs_upper_r0[lane] = MIN(dqs_upper_r0[lane], dqs_upper_r2[lane]);
+ dqs_lower_r0[lane] = MAX(dqs_lower_r0[lane], dqs_lower_r2[lane]);
+ }
+ } /* end If Rank 2 is present */
+
+ for (lane = 0; lane < 8; lane++) {
+ dqs_center[lane] = (dqs_upper_r0[lane] + dqs_lower_r0[lane]) / 2;
+ saved_dqs[channel][lane] += dqs_center[lane];
+ }
+ } /* END FOR_EACH_POPULATED_CHANNEL */
+ } /* end RT_LOOPS */
+
+ FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
+ for (lane = 0; lane < 8; lane++) {
+ saved_dqs[channel][lane] /= RT_LOOPS;
+ while (saved_dqs[channel][lane]--) /* check for overflow ? */
+ rt_increment_dqs(&s->rt_dqs_setting[channel][lane]);
+ rt_set_dqs(channel, lane, &s->rt_dqs_setting[channel][lane]);
+ }
+ }
+ return 0;
+}
diff --git a/src/northbridge/intel/x4x/raminit_ddr23.c b/src/northbridge/intel/x4x/raminit_ddr23.c
index 9e7583c..ded519e 100644
--- a/src/northbridge/intel/x4x/raminit_ddr23.c
+++ b/src/northbridge/intel/x4x/raminit_ddr23.c
@@ -2363,9 +2363,14 @@
// XXX tRD
- // XXX Write training
+ // Write training
+ if (do_write_training(s) && (s->selected_timings.mem_clk > MEM_CLOCK_667MHz))
+ die("Write training Failed!\n");
// XXX Read training
+ if (do_read_training(s) && (s->selected_timings.mem_clk > MEM_CLOCK_667MHz))
+ die("Read training Failed!\n");
+
// DRADRB
dradrb_ddr2(s);
diff --git a/src/northbridge/intel/x4x/x4x.h b/src/northbridge/intel/x4x/x4x.h
index c2eea38..188654b 100644
--- a/src/northbridge/intel/x4x/x4x.h
+++ b/src/northbridge/intel/x4x/x4x.h
@@ -250,6 +250,11 @@
u8 coarse;
};
+struct rt_dqs_setting {
+ u8 tap;
+ u8 pi;
+};
+
struct timings {
unsigned int CAS;
unsigned int tclk;
@@ -292,6 +297,9 @@
struct dimminfo dimms[4];
u8 spd_map[4];
u8 nmode;
+ struct dll_setting dqs_settings[2][8];
+ struct dll_setting dq_settings[2][8];
+ struct rt_dqs_setting rt_dqs_setting[2][8];
};
#define BOOT_PATH_NORMAL 0
#define BOOT_PATH_WARM_RESET 1
@@ -357,8 +365,12 @@
int ddr3_save_dimminfo(u8 dimm_idx, u8 *raw_spd,
struct abs_timings *saved_timings, struct sysinfo *s);
void send_jedec_cmd(const struct sysinfo *s, u8 r, u8 ch, u8 cmd, u32 val);
+void search_write_leveling(struct sysinfo *s);
void dqsset(u8 ch, u8 lane, const struct dll_setting *setting);
void dqset(u8 ch, u8 lane, const struct dll_setting *setting);
+void print_dll_setting(const struct dll_setting *dll_setting, u8 default_verbose);
+int do_write_training(struct sysinfo *s);
+int do_read_training(struct sysinfo *s);
/* Look up tables Tables */
extern const struct dll_setting ddr2_dll_setting_667[23];
--
To view, visit https://review.coreboot.org/19879
To unsubscribe, visit https://review.coreboot.org/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I806840445b5e768d079910fb9870a2cee7b9f1ca
Gerrit-PatchSet: 1
Gerrit-Project: coreboot
Gerrit-Branch: master
Gerrit-Owner: Arthur Heymans <arthur at aheymans.xyz>
More information about the coreboot-gerrit
mailing list