Arthur Heymans has uploaded this change for review. ( https://review.coreboot.org/22995
Change subject: nb/intel/x4x: Implement write leveling ......................................................................
nb/intel/x4x: Implement write leveling
DDR3 adapter a fly-by topology which allows for better signal integrity but at the same time requires additional calibration. This is done by settings the targeted rank in write leveling mode while disabling output buffer on the other ranks.
Change-Id: I695969868b4534f87dd1f37244fdfac891a417f0 Signed-off-by: Arthur Heymans arthur@aheymans.xyz --- M src/northbridge/intel/x4x/dq_dqs.c M src/northbridge/intel/x4x/raminit_ddr23.c M src/northbridge/intel/x4x/x4x.h 3 files changed, 401 insertions(+), 3 deletions(-)
git pull ssh://review.coreboot.org:29418/coreboot refs/changes/95/22995/1
diff --git a/src/northbridge/intel/x4x/dq_dqs.c b/src/northbridge/intel/x4x/dq_dqs.c index 97bfa42..12e2ab3 100644 --- a/src/northbridge/intel/x4x/dq_dqs.c +++ b/src/northbridge/intel/x4x/dq_dqs.c @@ -16,6 +16,7 @@
#include <arch/io.h> #include <console/console.h> +#include <delay.h> #include <stdint.h> #include <string.h> #include <types.h> @@ -106,6 +107,34 @@ return CB_SUCCESS; }
+static int decrement_dq_dqs(const struct sysinfo *s, + struct dll_setting *dq_dqs_setting) +{ + u8 max_tap_val = max_tap[s->selected_timings.mem_clk + - MEM_CLOCK_800MHz]; + /* PI */ + if (dq_dqs_setting->pi > 0) { + dq_dqs_setting->pi -= 1; + } else if (dq_dqs_setting->tap > 0) { + dq_dqs_setting->pi = 6; + dq_dqs_setting->tap -= 1; + } else if (dq_dqs_setting->clk_delay > 0) { + dq_dqs_setting->pi = 6; + dq_dqs_setting->tap = max_tap_val; + dq_dqs_setting->clk_delay -= 1; + } else if (dq_dqs_setting->coarse > 0) { + dq_dqs_setting->pi = 6; + dq_dqs_setting->tap = max_tap_val; + dq_dqs_setting->clk_delay += 1; + dq_dqs_setting->coarse -= 1; + } else { + return CB_ERR; + } + set_db(s, dq_dqs_setting); + return CB_SUCCESS; +} + + #define WT_PATTERN_SIZE 80
static const u32 write_training_schedule[WT_PATTERN_SIZE] = { @@ -476,3 +505,337 @@ printk(BIOS_DEBUG, "Done DQS read training\n"); return CB_SUCCESS; } + +/* Enable write leveling on rank0 + */ +static void set_rank_write_level(struct sysinfo *s, u8 channel, u8 config, + u8 config_rank, u8 target_rank, int wl_enable) +{ + u32 emrs1; + + /* Is shifted by bits 2 later so u8 can be used to reduce size */ + const static u8 emrs1_lut[8][4][4]={ /* [Config][Leveling Rank][Rank] */ + { /* Config 0: 2R2R */ + {0x11, 0x00, 0x91, 0x00}, + {0x00, 0x11, 0x91, 0x00}, + {0x91, 0x00, 0x11, 0x00}, + {0x91, 0x00, 0x00, 0x11} + }, + { // Config 1: 2R1R + {0x11, 0x00, 0x91, 0x00}, + {0x00, 0x11, 0x91, 0x00}, + {0x91, 0x00, 0x11, 0x00}, + {0x00, 0x00, 0x00, 0x00} + }, + { // Config 2: 1R2R + {0x11, 0x00, 0x91, 0x00}, + {0x00, 0x00, 0x00, 0x00}, + {0x91, 0x00, 0x11, 0x00}, + {0x91, 0x00, 0x00, 0x11} + }, + { // Config 3: 1R1R + {0x11, 0x00, 0x91, 0x00}, + {0x00, 0x00, 0x00, 0x00}, + {0x91, 0x00, 0x11, 0x00}, + {0x00, 0x00, 0x00, 0x00} + }, + { // Config 4: 2R0R + {0x11, 0x00, 0x00, 0x00}, + {0x00, 0x11, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00} + }, + { // Config 5: 0R2R + {0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x11, 0x00}, + {0x00, 0x00, 0x00, 0x11} + }, + { // Config 6: 1R0R + {0x11, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00} + }, + { // Config 7: 0R1R + {0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x11, 0x00}, + {0x00, 0x00, 0x00, 0x00} + } + }; + + if (wl_enable) { + printk(RAM_DEBUG, "Entering WL mode\n"); + printk(RAM_DEBUG, "Using WL ODT values\n"); + emrs1 = emrs1_lut[config][target_rank][config_rank]; + } else { + printk(RAM_DEBUG, "Exiting WL mode\n"); + emrs1 = ddr3_emrs1_config[s->dimm_config[channel]][config_rank]; + } + printk(RAM_DEBUG, "Setting ODT for rank%d to ", config_rank); + switch (emrs1) { + case 0: + printk(RAM_DEBUG, "0 Ohm\n"); + break; + case 0x11: + printk(RAM_DEBUG, "40 Ohm\n"); + break; + case 0x81: + printk(RAM_DEBUG, "30 Ohm\n"); + break; + case 0x80: + printk(RAM_DEBUG, "20 Ohm\n"); + break; + case 0x10: + printk(RAM_DEBUG, "120 Ohm\n"); + break; + case 0x01: + printk(RAM_DEBUG, "60 Ohm\n"); + break; + default: + printk(BIOS_WARNING, "ODT value Undefined!\n"); + break; + } + + emrs1 <<= 2; + emrs1 |= (1 << 1); + + if (wl_enable && (target_rank != config_rank)) { + printk(RAM_DEBUG, "Disabling output for rank%d\n", config_rank); + emrs1 |= (1 << 12); + } + if (wl_enable && (target_rank == config_rank)) { + printk(RAM_DEBUG, "Enabling WL for rank%d\n", config_rank); + emrs1 |= (1 << 7); + } + send_jedec_cmd(s, config_rank, channel, EMRS1_CMD, emrs1); +} + +#define N_SAMPLES 5 + +static void sample_dq(const struct sysinfo *s, u8 channel, u8 rank, + u8 high_found[8]) { + u32 address = test_address(channel, rank); + int samples, lane; + + memset(high_found, 0, TOTAL_BYTELANES * sizeof(high_found[0])); + for (samples = 0; samples < N_SAMPLES; samples++) { + write32((u32 *)address, 0x12341234); + write32((u32 *)address + 4, 0x12341234); + udelay(5); + FOR_EACH_BYTELANE(lane) { + u8 dq_high = (MCHBAR8(0x561 + 0x400 * channel + + (lane * 4)) >> 7) & 1; + high_found[lane] += dq_high; + } + } +} + +static enum cb_err increment_to_dqs_edge(struct sysinfo *s, u8 channel, u8 rank) +{ + int lane; + u8 saved_24d; + struct dll_setting dqs_setting[TOTAL_BYTELANES]; + u8 bytelane_ok = 0; + u8 dq_sample[TOTAL_BYTELANES]; + + memcpy(dqs_setting, s->dqs_settings[channel], sizeof(dqs_setting)); + FOR_EACH_BYTELANE(lane) + dqsset(channel, lane, &dqs_setting[lane]); + + saved_24d = MCHBAR8(0x24d + 0x400 * channel); + + /* Loop 0: Find DQ sample low, by decreasing */ + while (bytelane_ok != 0xff) { + sample_dq(s, channel, rank, dq_sample); + FOR_EACH_BYTELANE(lane) { + if (bytelane_ok & (1 << lane)) + continue; + + printk(RAM_SPEW, "%d, %d, %02d, %d," + " lane%d sample: %d\n", + dqs_setting[lane].coarse, + dqs_setting[lane].clk_delay, + dqs_setting[lane].tap, + dqs_setting[lane].pi, + lane, + dq_sample[lane]); + + if (dq_sample[lane] > 0) { + if (decrement_dq_dqs(s, &dqs_setting[lane])) { + printk(BIOS_EMERG, + "DQS setting channel%d, " + "lane %d reached a minimum!\n", + channel, lane); + return CB_ERR; + } + } else { + bytelane_ok |= (1 << lane); + } + dqsset(channel, lane, &dqs_setting[lane]); + } + } + + printk(RAM_DEBUG, "DQS settings on PASS #0:\n"); + FOR_EACH_BYTELANE(lane) { + printk(RAM_DEBUG, "lane %d: ", lane); + print_dll_setting(&dqs_setting[lane], 0); + } + + /* Loop 1: Find DQ sample high, by increasing */ + bytelane_ok = 0; + while (bytelane_ok != 0xff) { + sample_dq(s, channel, rank, dq_sample); + FOR_EACH_BYTELANE(lane) { + if (bytelane_ok & (1 << lane)) + continue; + + printk(RAM_SPEW, "%d, %d, %02d, %d, lane%d sample: %d\n", + dqs_setting[lane].coarse, + dqs_setting[lane].clk_delay, + dqs_setting[lane].tap, + dqs_setting[lane].pi, + lane, + dq_sample[lane]); + + + if (dq_sample[lane] == N_SAMPLES) { + bytelane_ok |= (1 << lane); + } else { + if (increment_dq_dqs(s, &dqs_setting[lane])) { + printk(BIOS_EMERG, + "DQS setting channel%d, " + "lane %d reached a maximum!\n", + channel, lane); + return CB_ERR; + + } + } + dqsset(channel, lane, &dqs_setting[lane]); + } + } + + printk(RAM_DEBUG, "DQS settings on PASS #1:\n"); + FOR_EACH_BYTELANE(lane) { + printk(RAM_DEBUG, "lane %d: ", lane); + print_dll_setting(&dqs_setting[lane], 0); + } + + printk(BIOS_DEBUG, "final WL DQS settings on CH%d\n", channel); + FOR_EACH_BYTELANE(lane) { + printk(BIOS_DEBUG, "\tlane%d: ", lane); + print_dll_setting(&dqs_setting[lane], 1); + s->dqs_settings[channel][lane] = dqs_setting[lane]; + } + + MCHBAR8(0x24d + 0x400 * channel) = saved_24d; + return CB_SUCCESS; +} + +void search_write_leveling(struct sysinfo *s) +{ + int i, ch, count; + u8 config, rank0, rank1, lane; + struct dll_setting dq_setting; + + u8 chanconfig_lut[16]={0, 6, 4, 6, 7, 3, 1, 3, 5, 2, 0, 2, 7, 3, 1, 3}; + + u8 odt_force[8][4] = { /* [Config][leveling rank] */ + {0x5, 0x6, 0x5, 0x9}, + {0x5, 0x6, 0x5, 0x0}, + {0x5, 0x0, 0x5, 0x9}, + {0x5, 0x0, 0x5, 0x0}, + {0x1, 0x2, 0x0, 0x0}, + {0x0, 0x0, 0x4, 0x8}, + {0x1, 0x0, 0x0, 0x0}, + {0x0, 0x0, 0x4, 0x0} + }; + + printk(BIOS_DEBUG, "Starting write levelling.\n"); + + FOR_EACH_POPULATED_CHANNEL(s->dimms, ch) { + printk(BIOS_DEBUG, "\tCH%d\n", ch); + config = chanconfig_lut[s->dimm_config[ch]]; + + MCHBAR8(0x5d8 + 0x400 * ch) = + MCHBAR8(0x5d8 + 0x400 * ch) & ~0x0e; + MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch) & + ~0x3fff) | 0x3fff; + MCHBAR8(0x265 + 0x400 * ch) = + MCHBAR8(0x265 + 0x400 * ch) & ~0x1f; + FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0) { + FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank1) { + set_rank_write_level(s, ch, config, rank1, rank0, 1); + } + + MCHBAR8(0x298 + 2 + 0x400 * ch) = + (MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f) + | odt_force[config][rank0]; + MCHBAR8(0x271 + 0x400 * ch) = (MCHBAR8(0x271 + 0x400 * ch) & ~0x7e) + | 0x4e; + MCHBAR8(0x5d9 + 0x400 * ch) = + (MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04) | 0x04; + MCHBAR32(0x1a0) = (MCHBAR32(0x1a0) & ~0x07ffffff) + | 0x00014000; + + if (increment_to_dqs_edge(s, ch, rank0)) + die("Write Leveling failed!"); + + MCHBAR8(0x298 + 2 + 0x400 * ch) = + MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f; + MCHBAR8(0x271 + 0x400 * ch) = + (MCHBAR8(0x271 + 0x400 * ch) & ~0x7e) + | 0x0e; + MCHBAR8(0x5d9 + 0x400 * ch) = + (MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04); + MCHBAR32(0x1a0) = (MCHBAR32(0x1a0) + & ~0x07ffffff) | 0x00555801; + FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, + rank1) { + if (rank0 == rank1) { + set_rank_write_level(s, ch, config, + rank1, rank0, 0); + send_jedec_cmd(s, rank1, ch, + NORMALOP_CMD, 1 << 12); + } + } + break; /* Break on first populated rank */ + } + MCHBAR8(0x5d8 + 0x400 * ch) = (MCHBAR8(0x5d8 + 0x400 * ch) & ~0x0e) | 0x0e; + MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch) & + ~0x3fff) | 0x1807; + MCHBAR8(0x265 + 0x400 * ch) = MCHBAR8(0x265 + 0x400 * ch) & ~0x1f; + FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0) { + set_rank_write_level(s, ch, config, rank0, rank0, 0); + } + + } + MCHBAR8(0x5dc) = (MCHBAR8(0x5dc) & ~0x80) | 0x80; + + /* + * Increment DQ dll setting by a standard amount past DQS, + * This is further trained in write training. + */ + switch (s->selected_timings.mem_clk) { + default: + case MEM_CLOCK_800MHz: + count = 39; + break; + case MEM_CLOCK_1066MHz: + count = 32; + break; + case MEM_CLOCK_1333MHz: + count = 42; + break; + } + + FOR_EACH_POPULATED_CHANNEL_AND_BYTELANE(s->dimms, ch, lane) { + dq_setting = s->dqs_settings[ch][lane]; + for (i = 0; i < count; i++) + increment_dq_dqs(s, &dq_setting); + dqset(ch, lane, &dq_setting); + } + + printk(BIOS_DEBUG, "Done write levelling.\n"); +} diff --git a/src/northbridge/intel/x4x/raminit_ddr23.c b/src/northbridge/intel/x4x/raminit_ddr23.c index 2835890..13b98ee 100644 --- a/src/northbridge/intel/x4x/raminit_ddr23.c +++ b/src/northbridge/intel/x4x/raminit_ddr23.c @@ -1360,8 +1360,7 @@ return channel * 512 * MiB + rank * 128 * MiB; }
-static void send_jedec_cmd(const struct sysinfo *s, u8 r, - u8 ch, u8 cmd, u32 val) +void send_jedec_cmd(const struct sysinfo *s, u8 r, u8 ch, u8 cmd, u32 val) { u32 addr = test_address(ch, r); volatile u32 rubbish; @@ -1926,6 +1925,23 @@ MCHBAR8(0x561 + (lane << 2)) = MCHBAR8(0x561 + (lane << 2)) & ~(1 << 3); }
+static void software_ddr3_reset(struct sysinfo *s) +{ + printk(BIOS_DEBUG, "Software initiated DDR3 reset.\n"); + MCHBAR8(0x1a8) = MCHBAR8(0x1a8) | 0x02; + MCHBAR8(0x5da) = MCHBAR8(0x5da) & ~0x80; + MCHBAR8(0x1a8) = MCHBAR8(0x1a8) & ~0x02; + MCHBAR8(0x5da) = (MCHBAR8(0x5da) & ~0x03) | 1; + udelay(200); + MCHBAR8(0x1a8) = MCHBAR8(0x1a8) & ~0x02; + MCHBAR8(0x5da) = MCHBAR8(0x5da) | 0x80; + MCHBAR8(0x5da) = MCHBAR8(0x5da) & ~0x80; + udelay(500); + MCHBAR8(0x5da) = MCHBAR8(0x5da) | 0x03; + MCHBAR8(0x5da) = MCHBAR8(0x5da) & ~0x03; + jedec_ddr3(s); +} + void do_raminit(struct sysinfo *s, int fast_boot) { u8 ch; @@ -2012,6 +2028,17 @@ MCHBAR8(0x9d8) = MCHBAR8(0x9d8) | 0x7; }
+ /* DDR3 reset */ + if ((s->spd_type == DDR3) && (s->boot_path != BOOT_PATH_RESUME)) { + printk(BIOS_DEBUG, "DDR3 Reset.\n"); + MCHBAR8(0x1a8) = MCHBAR8(0x1a8) & ~0x2; + MCHBAR8(0x5da) = MCHBAR8(0x5da) | 0x80; + udelay(500); + MCHBAR8(0x1a8) = MCHBAR8(0x1a8) & ~0x2; + MCHBAR8(0x5da) = MCHBAR8(0x5da) & ~0x80; + udelay(500); + } + // Pre jedec MCHBAR8(0x40) = MCHBAR8(0x40) | 0x2; FOR_EACH_POPULATED_CHANNEL(s->dimms, ch) { @@ -2031,6 +2058,13 @@
printk(BIOS_DEBUG, "Done jedec steps\n");
+ if (s->spd_type == DDR3) { + if (!fast_boot) + search_write_leveling(s); + if (s->boot_path == BOOT_PATH_NORMAL) + software_ddr3_reset(s); + } + // After JEDEC reset MCHBAR8(0x40) = MCHBAR8(0x40) & ~0x2; FOR_EACH_POPULATED_CHANNEL(s->dimms, ch) { diff --git a/src/northbridge/intel/x4x/x4x.h b/src/northbridge/intel/x4x/x4x.h index 37215b1..a781a80 100644 --- a/src/northbridge/intel/x4x/x4x.h +++ b/src/northbridge/intel/x4x/x4x.h @@ -396,7 +396,8 @@ struct abs_timings *saved_timings); int ddr3_save_dimminfo(u8 dimm_idx, u8 *raw_spd, struct abs_timings *saved_timings, struct sysinfo *s); - +void search_write_leveling(struct sysinfo *s); +void send_jedec_cmd(const struct sysinfo *s, u8 r, u8 ch, u8 cmd, u32 val);
extern const struct dll_setting default_ddr2_667_ctrl[7]; extern const struct dll_setting default_ddr2_800_ctrl[7];