Arthur Heymans has uploaded a new change for review. ( https://review.coreboot.org/19879 )
Change subject: [WIP]nb/intel/x4x/raminit: Implement read and write DQ DQS training ......................................................................
[WIP]nb/intel/x4x/raminit: Implement read and write DQ DQS training
This is not DDR3 specific.
Change-Id: I806840445b5e768d079910fb9870a2cee7b9f1ca Signed-off-by: Arthur Heymans arthur@aheymans.xyz --- M src/northbridge/intel/x4x/dq_dqsl_dll.c M src/northbridge/intel/x4x/raminit_ddr23.c M src/northbridge/intel/x4x/x4x.h 3 files changed, 462 insertions(+), 1 deletion(-)
git pull ssh://review.coreboot.org:29418/coreboot refs/changes/79/19879/1
diff --git a/src/northbridge/intel/x4x/dq_dqsl_dll.c b/src/northbridge/intel/x4x/dq_dqsl_dll.c index a1dea68..b7697d4 100644 --- a/src/northbridge/intel/x4x/dq_dqsl_dll.c +++ b/src/northbridge/intel/x4x/dq_dqsl_dll.c @@ -437,3 +437,447 @@
printk(BIOS_DEBUG, "Done write levelling.\n"); } + +static const u32 write_training_schedule[] = { + 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, + 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, + 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, + 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, + 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, + 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, + 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, + 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, + 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010, + 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010, + 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010, + 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010, + 0x03030303, 0x04040404, 0x09090909, 0x10101010, + 0x21212121, 0x40404040, 0x81818181, 0x00000000, + 0x03030303, 0x04040404, 0x09090909, 0x10101010, + 0x21212121, 0x40404040, 0x81818181, 0x00000000, + 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee, + 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe, + 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee, + 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe, +}; + +#define PATTERN_SIZE 80 + +static int test_dq_aligned(struct sysinfo *s, u8 channel, u32 address) +{ + u8 error_found = 0, lane, count; + u8 data[8]; + u16 i; + u32 content; + + for (i = 0; i < PATTERN_SIZE; i++) { + for (count = 0; count < PATTERN_SIZE; count++) { + if ((count % 16) == 0) + MCHBAR32(0xf90) = 1; + content = write_training_schedule[count]; + write32((u32 *)address + 8 * count, content); + write32((u32 *)address + 8 * count + 4, content); + } + + write32(&data[0], read32((u32 *)address + 8 * i)); + write32(&data[4], read32((u32 *)address + 8 * i + 4)); + for (lane = 0; lane < 8; lane++) { + if (data[lane] != (write_training_schedule[i] & 0xff)) + error_found |= (1 << lane); + } + /* No need to continue now */ + if (error_found == 0xff) + return 0xff; + } + return error_found; +} + +#define SUCCEEDING 0 +#define FAILING 1 +#define CONSISTENCY 10 + +/* + * This function finds either failing or succeeding writes by increasing DQ. + * When it has found a failing or succeeding setting it will increase DQ + * another 10 times to make sure the result is consistent. + * This means that the middle between failing and succeeding writes is shifted + * by 9 steps, which need to be substracted later. + */ +static int find_dq_limit(struct sysinfo *s, u8 channel, u32 address, + struct dll_setting *dq_setting, u8 *dq_lim, + u8 direction) +{ + int status; + u8 lane_passes[8] = { }; + u8 sample = 0xff; + u8 lane; + u8 lane_err; + + for (lane = 0; lane < 8; lane++) + dqset(channel, lane, &dq_setting[lane]); + + while(sample) { + status = 0; + /* TODO: This is probably more readable if done one lane at the time */ + lane_err = test_dq_aligned(s, channel, address); + lane_err ^= 0xff * direction; + for (lane = 0; lane < 8; lane++) { + if (lane_err & (1 << lane)) { + /* reuse function for DQ DLL settings */ + status = increment_dqs(s, &dq_setting[lane]); + dqset(channel, lane, &dq_setting[lane]); + dq_lim[lane]++; + } else if (lane_passes[lane] < CONSISTENCY) { + status = increment_dqs(s, &dq_setting[lane]); + dqset(channel, lane, &dq_setting[lane]); + dq_lim[lane]++; + lane_passes[lane]++; + } else if (lane_passes[lane] == CONSISTENCY) { + sample &= ~(1 << lane); + } + } + if (status) { + if (direction == 0) { + printk(BIOS_ERR, "Could not find good Write training settings\n"); + return 1; + } else { + break; + } + } + } + return 0; +} + +/* + * Increase DQ until writes succeed, then further increase DQ until it fails. + * Use the middle between this working lower limit and this failing upper + * limit. + */ +int do_write_training(struct sysinfo *s) +{ + int i; + u8 channel, lane; + u32 address; + u8 dq_lower_r0[8]; + u8 dq_upper_r0[8]; + u8 dq_lower_r2[8]; + u8 dq_upper_r2[8]; + u8 dq_center[8]; + struct dll_setting dq_setting[8]; + u8 dq_average; + u32 dq_absolute; + + FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) { + printk(BIOS_DEBUG, "Doing DQ write training on CH%d\n", channel); + address = 0x20000000 * channel; + for (i = 0; (i < RANKS_PER_CHANNEL) && !RANK_IS_POPULATED(s->dimms, channel, i); i++) + address += 128 * MiB; + + dq_average = 0; + dq_absolute = 0; + memset(dq_lower_r0, 0, sizeof(dq_lower_r0)); + memset(dq_lower_r2, 0, sizeof(dq_lower_r2)); + + memset(dq_center, 0, sizeof(dq_center)); + for (lane = 0; lane < 8; lane++) { + /* Start from DQS settings */ + s->dq_settings[channel][lane] = ddr3_dll_setting_800[s->nmode - 1][DQS1 + lane]; + dq_setting[lane] = ddr3_dll_setting_800[s->nmode - 1][DQS1 + lane]; /* TODO per freq */ + /* dq_setting[lane] = s->dqs_settings[channel][lane]; */ + } + + if (find_dq_limit(s, channel, address, dq_setting, dq_lower_r0, + SUCCEEDING)) { + printk(BIOS_CRIT, "Could not find working lower limit DQ setting\n"); + return -1; + } + printk(BIOS_DEBUG, "Lower Limit for DQ on rank 0:\n"); + for (lane = 0; lane < 8; lane++) { + printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n", + channel, lane, dq_lower_r0[lane]); + } + + memcpy(dq_upper_r0, dq_lower_r0, sizeof(dq_upper_r0)); + if (find_dq_limit(s, channel, address, dq_setting, dq_upper_r0, + FAILING)) { + printk(BIOS_CRIT, "Could not find failing upper limit DQ setting\n"); + return -1; + } + + printk(BIOS_DEBUG, "Upper Limit for DQ on rank 0:\n"); + for (lane = 0; lane < 8; lane++) { + printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n", + channel, lane, dq_upper_r0[lane]); + } + + if (RANK_IS_POPULATED(s->dimms, channel, 0) + && RANK_IS_POPULATED(s->dimms, channel, 2)) { + address += 256 * MiB; + + for (lane = 0; lane < 8; lane++) + dq_setting[lane] = s->dqs_settings[channel][lane]; /* TODO */ + + if (find_dq_limit(s, channel, address, dq_setting, dq_lower_r2, + SUCCEEDING)) { + printk(BIOS_CRIT, "Could not find working lower limit DQ setting\n"); + return -1; + } + printk(BIOS_DEBUG, "Lower Limit for DQ on rank 2:\n"); + for (lane = 0; lane < 8; lane++) { + printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n", + channel, lane, dq_lower_r2[lane]); + } + + memcpy(dq_upper_r2, dq_lower_r2, sizeof(dq_upper_r2)); + if (find_dq_limit(s, channel, address, dq_setting, dq_upper_r2, + FAILING)) { + printk(BIOS_CRIT, "Could not find failing upper limit DQ setting\n"); + return -1; + } + printk(BIOS_DEBUG, "Upper Limit for DQ on rank 2:\n"); + for (lane = 0; lane < 8; lane++) { + printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n", + channel, lane, dq_upper_r2[lane]); + } + + for (lane = 0; lane < 8; lane++) { + dq_upper_r0[lane] = MIN(dq_upper_r0[lane], dq_upper_r2[lane]); + dq_lower_r0[lane] = MAX(dq_lower_r0[lane], dq_lower_r2[lane]); + } + } /* If Rank 2 is present */ + + for (lane = 0; lane < 8; lane++) { + dq_lower_r0[lane] -= 9; + dq_upper_r0[lane] -= 9; + } + + for (lane = 0; lane < 8; lane++) + dq_center[lane] = (dq_upper_r0[lane] + dq_lower_r0[lane]) / 2; + + printk(BIOS_DEBUG, "Centered values for DQ DLL:\n"); + for (lane = 0; lane < 8; lane++) { + printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n", + channel, lane, dq_center[lane]); + } + + /* Reset DQ DLL settings and increment with centered value*/ + for (lane = 0; lane < 8; lane++) { + dq_setting[lane] = ddr3_dll_setting_800[s->nmode - 1][DQS1 + lane]; + for (i = 0; i < dq_center[lane]; i++) + increment_dqs(s, &s->dq_settings[channel][lane]); + dqset(channel, lane, &s->dq_settings[channel][lane]); + } + } + return 0; +} + + +static void rt_set_dqs(u8 channel, u8 lane, struct rt_dqs_setting *dqs_setting) +{ + u8 saved_tap = MCHBAR8(0x540 + 0x400 * channel + lane * 4); + u8 saved_pi = MCHBAR8(0x542 + 0x400 * channel + lane * 4); + printk(RAM_SPEW, "RT DQS: ch%d, L%d, %d.%d, ", channel, lane, + dqs_setting->tap, + dqs_setting->pi); + + saved_tap &= ~0xf; + saved_tap |= dqs_setting->tap; + MCHBAR8(0x540 + 0x400 * channel + lane * 4) = saved_tap; + + saved_pi &= ~0x7; + saved_pi |= dqs_setting->pi; + MCHBAR8(0x542 + 0x400 * channel + lane * 4) = saved_pi; + printk(RAM_SPEW, " %d.%d\n", + MCHBAR8(0x540 + 0x400 * channel + lane * 4) & 0xf, + MCHBAR8(0x542 + 0x400 * channel + lane * 4) & 0x7); +} + +static int rt_increment_dqs(struct rt_dqs_setting *setting) +{ + if (setting->pi < 7) { + setting->pi++; + } else if (setting->tap < 14) { + setting->pi = 0; + setting->tap++; + } else { + return -1; + } + return 0; +} + +static u8 test_dqs_aligned(struct sysinfo *s, u8 channel, u32 address) +{ + u8 error_lane = 0; + u8 data8[8]; + int i, lane; + + for (i = 0; i < PATTERN_SIZE; i++) { + write32(&data8[0], read32((u32 *)address + i * 8)); + write32(&data8[4], read32((u32 *)address + i * 8 + 4)); + for (lane = 0; lane < 8; lane++) { + if (data8[lane] != (write_training_schedule[i] & 0xff)) + error_lane |= (1 << lane); + } + /* No need to continue now */ + if (error_lane == 0xff) + break; + + } + return error_lane; +} + +static int rt_find_dqs_limit(struct sysinfo *s, u8 channel, u32 address, + struct rt_dqs_setting *dqs_setting, u8 *dqs_lim, + u8 direction) +{ + int lane; + u8 sample = 0xff, lane_err; + + for (lane = 0; lane < 8; lane++) + rt_set_dqs(channel, lane, &dqs_setting[lane]); + + while(sample) { + /* TODO: This is probably more readable if done one lane at the time */ + lane_err = test_dqs_aligned(s, channel, address); + lane_err ^= 0xff * direction; + for (lane = 0; lane < 8; lane++) { + /* Checking lanes that have already been done is a good idea */ + /* since those can be found bad again. */ + if (lane_err & (1 << lane)) { + if (rt_increment_dqs(&dqs_setting[lane])) { + if (direction == SUCCEEDING) { + printk(BIOS_CRIT, "Could not find RT DQS setting\n"); + return -1; + } else { + sample &= ~(1 << lane); + continue; + } + } + dqs_lim[lane]++; + rt_set_dqs(channel, lane, &dqs_setting[lane]); + } else { + sample &= ~(1 << lane); + } + } + } + return 0; +} + +#define RT_LOOPS 3 + +/* + * Increase DQS until read succeed, then further increase DQS until it fails. + * Use the middle between this working lower limit and this failing upper + * limit. + * To improve statistics this is done RT_LOOPS amount of timings, while additioning + * the centered values to some saved values. At the end the saved values are divided + * by RT_LOOPS. + */ +int do_read_training(struct sysinfo *s) +{ + int loop, channel, i, lane; + u32 address, content; + u8 dqs_lower_r0[8]; + u8 dqs_upper_r0[8]; + u8 dqs_lower_r2[8]; + u8 dqs_upper_r2[8]; + u8 dqs_center[8]; + u16 saved_dqs[2][8] = { }; + + struct rt_dqs_setting dqs_setting[8]; + + for (loop = 0; loop < RT_LOOPS; loop++) { + FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) { + printk(BIOS_DEBUG, "Doing DQS write training on CH%d\n", channel); + address = 0x20000000 * channel; + for (i = 0; !RANK_IS_POPULATED(s->dimms, channel, i) + && i < RANKS_PER_CHANNEL; i++) + address += 128 * MiB; + + /* Write pattern to strobe address*/ + for (i = 0; i < PATTERN_SIZE; i++) { + content = write_training_schedule[i]; + write32((u32 *)address + 8 * i, content); + write32((u32 *)address + 8 * i + 4, content); + } + + memset(dqs_lower_r0, 0, sizeof(dqs_lower_r0)); + memset(dqs_lower_r2, 0, sizeof(dqs_lower_r2)); + memset(dqs_setting, 0, sizeof(dqs_setting)); + + if (rt_find_dqs_limit(s, channel, address, dqs_setting, dqs_lower_r0, + SUCCEEDING)) { + printk(BIOS_CRIT, "Could not find working lower limit DQS setting\n"); + return -1; + } + printk(BIOS_DEBUG, "Lower Limit for DQS on rank 0:\n"); + for (lane = 0; lane < 8; lane++) { + printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n", + channel, lane, dqs_lower_r0[lane]); + } + + memcpy(dqs_upper_r0, dqs_lower_r0, sizeof(dqs_upper_r0)); + if (rt_find_dqs_limit(s, channel, address, dqs_setting, dqs_upper_r0, + FAILING)) { + printk(BIOS_CRIT, "Could not find failing upper limit DQ setting\n"); + return -1; + } + + printk(BIOS_DEBUG, "Upper Limit for DQS on rank 0:\n"); + for (lane = 0; lane < 8; lane++) { + printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n", + channel, lane, dqs_upper_r0[lane]); + } + + if (RANK_IS_POPULATED(s->dimms, channel, 0) + && RANK_IS_POPULATED(s->dimms, channel, 2)) { + address += 256 * MiB; + + memset(dqs_setting, 0 , sizeof(dqs_setting)); + + if (rt_find_dqs_limit(s, channel, address, dqs_setting, dqs_lower_r2, + SUCCEEDING)) { + printk(BIOS_CRIT, "Could not find working lower limit DQS setting\n"); + return -1; + } + printk(BIOS_DEBUG, "Lower Limit for DQS on rank 2:\n"); + for (lane = 0; lane < 8; lane++) { + printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n", + channel, lane, dqs_lower_r2[lane]); + } + + memcpy(dqs_upper_r2, dqs_lower_r2, sizeof(dqs_upper_r2)); + if (rt_find_dqs_limit(s, channel, address, dqs_setting, dqs_upper_r2, + FAILING)) { + printk(BIOS_CRIT, "Could not find failing upper limit DQS setting\n"); + return -1; + } + printk(BIOS_DEBUG, "Upper Limit for DQS on rank 2:\n"); + for (lane = 0; lane < 8; lane++) { + printk(BIOS_DEBUG, " ch%d, lane %d, #steps %d\n", + channel, lane, dqs_upper_r2[lane]); + } + + for (lane = 0; lane < 8; lane++) { + dqs_upper_r0[lane] = MIN(dqs_upper_r0[lane], dqs_upper_r2[lane]); + dqs_lower_r0[lane] = MAX(dqs_lower_r0[lane], dqs_lower_r2[lane]); + } + } /* end If Rank 2 is present */ + + for (lane = 0; lane < 8; lane++) { + dqs_center[lane] = (dqs_upper_r0[lane] + dqs_lower_r0[lane]) / 2; + saved_dqs[channel][lane] += dqs_center[lane]; + } + } /* END FOR_EACH_POPULATED_CHANNEL */ + } /* end RT_LOOPS */ + + FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) { + for (lane = 0; lane < 8; lane++) { + saved_dqs[channel][lane] /= RT_LOOPS; + while (saved_dqs[channel][lane]--) /* check for overflow ? */ + rt_increment_dqs(&s->rt_dqs_setting[channel][lane]); + rt_set_dqs(channel, lane, &s->rt_dqs_setting[channel][lane]); + } + } + return 0; +} diff --git a/src/northbridge/intel/x4x/raminit_ddr23.c b/src/northbridge/intel/x4x/raminit_ddr23.c index 9e7583c..ded519e 100644 --- a/src/northbridge/intel/x4x/raminit_ddr23.c +++ b/src/northbridge/intel/x4x/raminit_ddr23.c @@ -2363,9 +2363,14 @@
// XXX tRD
- // XXX Write training + // Write training + if (do_write_training(s) && (s->selected_timings.mem_clk > MEM_CLOCK_667MHz)) + die("Write training Failed!\n");
// XXX Read training + if (do_read_training(s) && (s->selected_timings.mem_clk > MEM_CLOCK_667MHz)) + die("Read training Failed!\n"); +
// DRADRB dradrb_ddr2(s); diff --git a/src/northbridge/intel/x4x/x4x.h b/src/northbridge/intel/x4x/x4x.h index c2eea38..188654b 100644 --- a/src/northbridge/intel/x4x/x4x.h +++ b/src/northbridge/intel/x4x/x4x.h @@ -250,6 +250,11 @@ u8 coarse; };
+struct rt_dqs_setting { + u8 tap; + u8 pi; +}; + struct timings { unsigned int CAS; unsigned int tclk; @@ -292,6 +297,9 @@ struct dimminfo dimms[4]; u8 spd_map[4]; u8 nmode; + struct dll_setting dqs_settings[2][8]; + struct dll_setting dq_settings[2][8]; + struct rt_dqs_setting rt_dqs_setting[2][8]; }; #define BOOT_PATH_NORMAL 0 #define BOOT_PATH_WARM_RESET 1 @@ -357,8 +365,12 @@ int ddr3_save_dimminfo(u8 dimm_idx, u8 *raw_spd, struct abs_timings *saved_timings, struct sysinfo *s); void send_jedec_cmd(const struct sysinfo *s, u8 r, u8 ch, u8 cmd, u32 val); +void search_write_leveling(struct sysinfo *s); void dqsset(u8 ch, u8 lane, const struct dll_setting *setting); void dqset(u8 ch, u8 lane, const struct dll_setting *setting); +void print_dll_setting(const struct dll_setting *dll_setting, u8 default_verbose); +int do_write_training(struct sysinfo *s); +int do_read_training(struct sysinfo *s);
/* Look up tables Tables */ extern const struct dll_setting ddr2_dll_setting_667[23];