Tristan Hsieh has uploaded this change for review. ( https://review.coreboot.org/28842
Change subject: mediatek/mt8183: Add DDR driver of tx rx window perbit cal part ......................................................................
mediatek/mt8183: Add DDR driver of tx rx window perbit cal part
BUG=b:80501386 BRANCH=none TEST=Boots correctly on Kukui, and inits DRAM successfully with related patches.
Change-Id: I4434897864993e254e1362416316470083351493 Signed-off-by: Huayang Duan huayang.duan@mediatek.com --- M src/soc/mediatek/mt8183/dramc_pi_calibration_api.c M src/soc/mediatek/mt8183/include/soc/dramc_pi_api.h M src/soc/mediatek/mt8183/include/soc/memlayout.ld 3 files changed, 844 insertions(+), 1 deletion(-)
git pull ssh://review.coreboot.org:29418/coreboot refs/changes/42/28842/1
diff --git a/src/soc/mediatek/mt8183/dramc_pi_calibration_api.c b/src/soc/mediatek/mt8183/dramc_pi_calibration_api.c index 159d72a..f6ab9a0 100644 --- a/src/soc/mediatek/mt8183/dramc_pi_calibration_api.c +++ b/src/soc/mediatek/mt8183/dramc_pi_calibration_api.c @@ -20,6 +20,81 @@ #include <soc/dramc_register.h> #include <soc/dramc_pi_api.h>
+enum { + RX_VREF_BEGIN = 0, + RX_VREF_END = 12, + RX_VREF_STEP = 1, + TX_VREF_BEGIN = 8, + TX_VREF_END = 18, + TX_VREF_STEP = 1, +}; + +enum { + FIRST_DQ_DELAY = 0, + FIRST_DQS_DELAY = -27, + MAX_DQDLY_TAPS = 16, + MAX_RX_DQDLY_TAPS = 63, + MAX_TX_DQDLY_TAPS = 200, + MAX_TX_DQSDLY_TAPS = 63, + MAX_RX_DQSDLY_TAPS = 26 +}; + +enum { + RX_WIN_RD_DQC = 0, + RX_WIN_TEST_ENG, + TX_WIN_DQ_ONLY, + TX_WIN_DQ_DQM, +}; + +enum { + RX_DQ = 0, + RX_DQM, + RX_DQS, +}; + +struct dqdqs_perbit_dly { + s16 first_dqdly; + s16 last_dqdly; + s16 first_dqsdly; + s16 last_dqsdly; + s16 best_first_dqdly; + s16 best_last_dqdly; + s16 best_first_dqsdly; + s16 best_last_dqsdly; + s16 best_dqdly; + s16 best_dqsdly; +}; + +struct vref_perbit_dly { + u8 vref; + u16 max_win; + u16 min_win; + struct dqdqs_perbit_dly perbit_dly[DQ_DATA_WIDTH]; +}; + +struct tx_dly_tune { + u8 fine_tune; + u8 coarse_tune_large; + u8 coarse_tune_small; + u8 coarse_tune_large_oen; + u8 coarse_tune_small_oen; +}; + +struct per_byte_dly { + u16 max_center; + u16 min_center; + u16 final_dly; +}; + +struct per_bit_dly { + s16 first_pass; + s16 last_pass; + s16 win_center; + u16 win_size; + u16 best_dqdly; +}; +extern const u8 phy_mapping[CHANNEL_MAX][16]; + static void auto_refresh_switch(u8 chn, u8 option) { clrsetbits_le32(&ch[chn].ao.refctrl0, 1 << REFCTRL0_REFDIS_SHIFT, @@ -863,6 +938,769 @@ dram_phy_reset(chn); }
+static void dramc_rd_dqc_init(u8 chn, u8 rank) +{ + u8 *lpddr_phy_mapping; + u16 temp_value = 0; + u8 mr15_golden_value = 0; + u8 mr20_golden_value = 0; + + for (u8 b = 0; b < 2; b++) + clrbits_le32(&ch[chn].phy.shu[0].b[b].dq[7], + 0x1 << SHU1_BX_DQ7_R_DMDQMDBI_SHU_SHIFT); + + clrsetbits_le32(&ch[chn].ao.mrs, + MRS_MRSRK_MASK, rank << MRS_MRSRK_SHIFT); + setbits_le32(&ch[chn].ao.mpc_option, + 0x1 << MPC_OPTION_MPCRKEN_SHIFT); + + lpddr_phy_mapping = (u8 *)phy_mapping[chn]; + for (u8 i = 0; i < 16; i++) + temp_value |= ((0x5555 >> i) & 0x1) << lpddr_phy_mapping[i]; + + mr15_golden_value = (u8) temp_value & 0xff; + mr20_golden_value = (u8) (temp_value >> 8) & 0xff; + clrsetbits_le32(&ch[chn].ao.mr_golden, + MR_GOLDEN_MR15_GOLDEN_MASK | MR_GOLDEN_MR20_GOLDEN_MASK, + mr15_golden_value << 8 | mr20_golden_value); +} + +static u32 dramc_rd_dqc_run(u8 chn) +{ + u16 timeout_cnt = 100; + u8 response; + u32 result; + + setbits_le32(&ch[chn].ao.spcmdctrl, 1 << SPCMDCTRL_RDDQCDIS_SHIFT); + setbits_le32(&ch[chn].ao.spcmd, 1 << SPCMD_RDDQCEN_SHIFT); + do { + response = read32(&ch[chn].nao.spcmdresp) & + (0x1 << SPCMDRESP_RDDQC_RESPONSE_SHIFT); + timeout_cnt--; + udelay(1); + } while ((response == 0) && (timeout_cnt > 0)); + + if (timeout_cnt == 0) + dramc_dbg("[RxWinRDDQC] Resp fail (time out)\n"); + + result = read32(&ch[chn].nao.rdqc_cmp); + clrbits_le32(&ch[chn].ao.spcmd, 1 << SPCMD_RDDQCEN_SHIFT); + clrbits_le32(&ch[chn].ao.spcmdctrl, 1 << SPCMDCTRL_RDDQCDIS_SHIFT); + + return result; +} + +static void dramc_rd_dqc_end(u8 chn) +{ + clrsetbits_le32(&ch[chn].ao.mrs, MRS_MRSRK_MASK, 0 << MRS_MRSRK_SHIFT); +} + +static void dramc_rx_vref_enable(u8 chn) +{ + setbits_le32(&ch[chn].phy.b[0].dq[5], + 0x1 << B0_DQ5_RG_RX_ARDQ_VREF_EN_B0_SHIFT); + setbits_le32(&ch[chn].phy.b[1].dq[5], + 0x1 << B1_DQ5_RG_RX_ARDQ_VREF_EN_B1_SHIFT); +} + +static void dramc_set_rx_vref(u8 chn, u8 value) +{ + for (u8 b = 0; b < 2; b++) + clrsetbits_le32(&ch[chn].phy.shu[0].b[b].dq[5], + SHU1_BX_DQ5_RG_RX_ARDQ_VREF_SEL_B0_MASK, + value << SHU1_BX_DQ5_RG_RX_ARDQ_VREF_SEL_B0_SHIFT); +} + +static void dramc_set_tx_vref(u8 chn, u8 rank, u8 value) +{ + dramc_mode_reg_write_by_rank(chn, rank, 14, value | 0 << 6); +} + +static void dramc_set_vref(u8 chn, u8 rank, u8 cal_type, u8 vref) +{ + if (cal_type == RX_WIN_TEST_ENG) + dramc_set_rx_vref(chn, vref); + else + dramc_set_tx_vref(chn, rank, vref); +} + +static void dramc_transfer_dly_tune(u8 chn, u32 dly, + struct tx_dly_tune *dly_tune) +{ + u16 tmp_val; + + dly_tune->fine_tune = dly & (TX_DQ_COARSE_TUNE_TO_FINE_TUNE_TAP - 1); + + tmp_val = (dly / TX_DQ_COARSE_TUNE_TO_FINE_TUNE_TAP) << 1; + dly_tune->coarse_tune_small = tmp_val - ((tmp_val >> 3) << 3); + dly_tune->coarse_tune_large = tmp_val >> 3; + + tmp_val -= 4; + dly_tune->coarse_tune_small_oen = tmp_val - ((tmp_val >> 3) << 3); + dly_tune->coarse_tune_large_oen = tmp_val >> 3; +} + +static void set_rx_dly_factor(u8 chn, u8 rank, u8 type, u32 val) +{ + u8 i; + u32 tmp, mask; + + switch (type) { + case RX_DQ: + tmp = (val << 24 | val << 16 | val << 8 | val); + for (i = 2; i < 6; i++) { + write32(&ch[chn].phy.shu[0].rk[rank].b[0].dq[i], tmp); + write32(&ch[chn].phy.shu[0].rk[rank].b[1].dq[i], tmp); + } + break; + + case RX_DQM: + tmp = (val << 8 | val); + mask = SHU1_B0_DQ6_RK_RX_ARDQM0_F_DLY_B0_MASK | + SHU1_B0_DQ6_RK_RX_ARDQM0_R_DLY_B0_MASK; + clrsetbits_le32(&ch[chn].phy.shu[0].rk[rank].b[0].dq[6], + mask, tmp); + clrsetbits_le32(&ch[chn].phy.shu[0].rk[rank].b[1].dq[6], + mask, tmp); + break; + + case RX_DQS: + tmp = (val << 24 | val << 16); + mask = SHU1_B0_DQ6_RK_RX_ARDQS0_F_DLY_B0_MASK | + SHU1_B0_DQ6_RK_RX_ARDQS0_R_DLY_B0_MASK; + clrsetbits_le32(&ch[chn].phy.shu[0].rk[rank].b[0].dq[6], + mask, tmp); + clrsetbits_le32(&ch[chn].phy.shu[0].rk[rank].b[1].dq[6], + mask, tmp); + } +} + +static void set_tx_dly_factor(u8 chn, u8 rank, u8 type, u32 curr_val) +{ + struct tx_dly_tune dly_tune = {0}; + u32 coarse_tune_large = 0, coarse_tune_large_oen = 0; + u32 coarse_tune_small = 0, coarse_tune_small_oen = 0; + + dramc_transfer_dly_tune(chn, curr_val, &dly_tune); + + for (u8 i = 0; i < 4; i++) { + coarse_tune_large += dly_tune.coarse_tune_large << (i * 4); + coarse_tune_large_oen += + dly_tune.coarse_tune_large_oen << (i * 4); + coarse_tune_small += dly_tune.coarse_tune_small << (i * 4); + coarse_tune_small_oen += + dly_tune.coarse_tune_small_oen << (i * 4); + } + if (type == TX_WIN_DQ_DQM) + dramc_dbg("%3d |%d %d %2d | [0]", curr_val, + dly_tune.coarse_tune_large, + dly_tune.coarse_tune_small, dly_tune.fine_tune); + + if (type != TX_WIN_DQ_DQM && type != TX_WIN_DQ_ONLY) + return; + + write32(&ch[chn].ao.shu[0].rk[rank].selph_dq[0], + (coarse_tune_large_oen << 16) | coarse_tune_large); + write32(&ch[chn].ao.shu[0].rk[rank].selph_dq[2], + (coarse_tune_small_oen << 16) | coarse_tune_small); + for (u8 b = 0; b < 2; b++) + clrsetbits_le32(&ch[chn].phy.shu[0].rk[rank].b[b].dq[7], + FINE_TUNE_DQ_MASK, dly_tune.fine_tune << 8); + + if (type == TX_WIN_DQ_DQM) { + /* large coarse_tune setting */ + write32(&ch[chn].ao.shu[0].rk[rank].selph_dq[1], + (coarse_tune_large_oen << 16) | coarse_tune_large); + write32(&ch[chn].ao.shu[0].rk[rank].selph_dq[3], + (coarse_tune_small_oen << 16) | coarse_tune_small); + /* fine_tune delay setting */ + for (u8 b = 0; b < 2; b++) + clrsetbits_le32(&ch[chn].phy.shu[0].rk[rank].b[b].dq[7], + FINE_TUNE_DQM_MASK, dly_tune.fine_tune << 16); + + } +} + +static u32 dramc_get_smallest_dqs_dly(u8 chn, u8 rank, + const struct sdram_params *params) +{ + u32 min_dly = 0xffff; + + for (size_t i = 0; i < DQS_NUMBER; i++) + min_dly = MIN(min_dly, params->wr_level[chn][rank][i]); + + return DQS_DELAY + min_dly; +} + + +static void dramc_get_dly_range(u8 chn, u8 rank, u8 cal_type, u16 *pre_cal, + s16 *begin, s16 *end, const struct sdram_params *params) +{ + u16 pre_dq_dly; + switch (cal_type) { + case RX_WIN_RD_DQC: + case RX_WIN_TEST_ENG: + *begin = FIRST_DQS_DELAY; + *end = MAX_RX_DQDLY_TAPS; + break; + + case TX_WIN_DQ_DQM: + *begin = dramc_get_smallest_dqs_dly(chn, rank, params); + *end = *begin + 256; + break; + + case TX_WIN_DQ_ONLY: + pre_dq_dly = MIN(pre_cal[0], pre_cal[1]); + pre_dq_dly = (pre_dq_dly > 24) ? (pre_dq_dly - 24) : 0; + *begin = pre_dq_dly; + *end = *begin + 64; + break; + } +} + +static void dramc_check_dqdqs_win(struct dqdqs_perbit_dly *p, + u16 dly_step, u16 last_step, u32 fail_bit, u8 flag) +{ + s16 pass_win, best_pass_win; + s16 *first_dly_pass, *last_dly_pass; + s16 *best_first_pass, *best_last_pass; + + if (flag == 0) { + first_dly_pass = &(p->first_dqsdly); + last_dly_pass = &(p->last_dqsdly); + best_first_pass = &(p->best_first_dqsdly); + best_last_pass = &(p->best_last_dqsdly); + } else { + first_dly_pass = &(p->first_dqdly); + last_dly_pass = &(p->last_dqdly); + best_first_pass = &(p->best_first_dqdly); + best_last_pass = &(p->best_last_dqdly); + } + + if (fail_bit == 0) { + if (*first_dly_pass == -1) { + /* first pass delay tap */ + *first_dly_pass = dly_step; + } + if ((*last_dly_pass == -2) && (dly_step == last_step)) { + /* pass to the last tap */ + *last_dly_pass = dly_step; + pass_win = *last_dly_pass - *first_dly_pass; + best_pass_win = *best_last_pass - *best_first_pass; + if (pass_win > best_pass_win) { + *best_last_pass = *last_dly_pass; + *best_first_pass = *first_dly_pass; + } + /* clear to find the next pass range if it has */ + *first_dly_pass = -1; + *last_dly_pass = -2; + } + } else { + if ((*first_dly_pass != -1) + && (*last_dly_pass == -2)) { + *last_dly_pass = dly_step - 1; + pass_win = *last_dly_pass - *first_dly_pass; + best_pass_win = *best_last_pass - *best_first_pass; + if (pass_win > best_pass_win) { + *best_last_pass = *last_dly_pass; + *best_first_pass = *first_dly_pass; + } + /* clear to find the next pass range if it has */ + *first_dly_pass = -1; + *last_dly_pass = -2; + } + } +} + +static void dramc_set_vref_dly(struct vref_perbit_dly *vref_dly, + u8 vref, u32 win_size_sum, struct dqdqs_perbit_dly delay[]) +{ + struct dqdqs_perbit_dly *perbit_dly = vref_dly->perbit_dly; + + vref_dly->max_win = win_size_sum; + vref_dly->vref = vref; + for (u8 bit = 0; bit < DQ_DATA_WIDTH; bit++) { + perbit_dly[bit].best_dqdly = delay[bit].best_dqdly; + perbit_dly[bit].best_first_dqdly = delay[bit].best_first_dqdly; + perbit_dly[bit].best_last_dqdly = delay[bit].best_last_dqdly; + perbit_dly[bit].best_first_dqsdly = + delay[bit].best_first_dqsdly; + perbit_dly[bit].best_last_dqsdly = delay[bit].best_last_dqsdly; + } +} + +static int dramk_calc_best_vref(u8 cal_type, u8 vref, + struct vref_perbit_dly *vref_dly, + struct dqdqs_perbit_dly delay[]) +{ + u8 bit; + u32 win_size; + u32 win_size_sum = 0; + static u32 min_win_size_vref; + + if (cal_type == RX_WIN_TEST_ENG) { + for (bit = 0; bit < DQ_DATA_WIDTH; bit++) { + win_size_sum += (delay[bit].best_last_dqdly - + delay[bit].best_first_dqdly + 1); + win_size_sum += (delay[bit].best_last_dqsdly - + delay[bit].best_first_dqsdly + 1); + } + + if (win_size_sum > vref_dly->max_win) + dramc_set_vref_dly(vref_dly, vref, win_size_sum, delay); + + if (win_size_sum < (vref_dly->max_win * 95 / 100)) + return 1; + } else if (cal_type == TX_DQ_DQS_MOVE_DQ_ONLY) { + for (bit = 0; bit < DQ_DATA_WIDTH; bit++) { + win_size = (delay[bit].best_last_dqdly - + delay[bit].best_first_dqdly + 1); + vref_dly->min_win = MIN(vref_dly->min_win, win_size); + win_size_sum += win_size; + } + + if ((win_size_sum > vref_dly->max_win) + && (vref_dly->min_win >= min_win_size_vref)) { + min_win_size_vref = vref_dly->min_win; + dramc_set_vref_dly(vref_dly, vref, win_size_sum, delay); + } + } else + dramc_set_vref_dly(vref_dly, vref, win_size_sum, delay); + + return 0; +} + +static void dramc_calc_tx_perbyte_dly( + struct dqdqs_perbit_dly *p, s16 *win, + struct per_byte_dly *byte_delay_prop) +{ + s16 win_center; + win_center = (p->best_first_dqdly + p->best_last_dqdly) >> 1; + *win = win_center; + + if (win_center < byte_delay_prop->min_center) + byte_delay_prop->min_center = win_center; + if (win_center > byte_delay_prop->max_center) + byte_delay_prop->max_center = win_center; +} + +static void dramc_set_rx_dly(u8 chn, u8 rank, s32 dly) +{ + if (dly <= 0) { + /* Hold time calibration */ + set_rx_dly_factor(chn, rank, RX_DQS, -dly); + dram_phy_reset(chn); + } else { + /* Setup time calibration */ + set_rx_dly_factor(chn, rank, RX_DQS, 0); + set_rx_dly_factor(chn, rank, RX_DQM, dly); + dram_phy_reset(chn); + set_rx_dly_factor(chn, rank, RX_DQ, dly); + } + +} + +static void set_tx_best_dly_factor(u8 chn, u8 rank_start, + struct per_byte_dly *tx_perbyte_dly, u16 dq_precal_result[]) +{ + u8 i, rank; + u32 coarse_tune_large = 0; + u32 coarse_tune_large_oen = 0; + u32 coarse_tune_small = 0; + u32 coarse_tune_small_oen = 0; + u16 dq_oen[DQS_NUMBER] = {0}, dqm_oen[DQS_NUMBER] = {0}; + struct tx_dly_tune dqdly_tune[DQS_NUMBER] = {0}; + struct tx_dly_tune dqmdly_tune[DQS_NUMBER] = {0}; + + for (i = 0; i < DQS_NUMBER; i++) { + dramc_transfer_dly_tune(chn, tx_perbyte_dly[i].final_dly, + &dqdly_tune[i]); + dramc_transfer_dly_tune(chn, dq_precal_result[i], + &dqmdly_tune[i]); + + coarse_tune_large += dqdly_tune[i].coarse_tune_large << (i * 4); + coarse_tune_large_oen += + dqdly_tune[i].coarse_tune_large_oen << (i * 4); + coarse_tune_small += dqdly_tune[i].coarse_tune_small << (i * 4); + coarse_tune_small_oen += + dqdly_tune[i].coarse_tune_small_oen << (i * 4); + + dq_oen[i] = (dqdly_tune[i].coarse_tune_large_oen << 3) + + (dqdly_tune[i].coarse_tune_small_oen << 5) + + dqdly_tune[i].fine_tune; + dqm_oen[i] = (dqmdly_tune[i].coarse_tune_large_oen << 3) + + (dqmdly_tune[i].coarse_tune_small_oen << 5) + + dqmdly_tune[i].fine_tune; + } + + for (rank = rank_start; rank < RANK_MAX; rank++) { + write32(&ch[chn].ao.shu[0].rk[rank].selph_dq[0], + (coarse_tune_large_oen << 16) | coarse_tune_large); + write32(&ch[chn].ao.shu[0].rk[rank].selph_dq[2], + (coarse_tune_small_oen << 16) | coarse_tune_small); + write32(&ch[chn].ao.shu[0].rk[rank].selph_dq[1], + (coarse_tune_large_oen << 16) | coarse_tune_large); + write32(&ch[chn].ao.shu[0].rk[rank].selph_dq[3], + (coarse_tune_small_oen << 16) | coarse_tune_small); + } + + for (rank = rank_start; rank < RANK_MAX; rank++) + for (u8 b = 0; b < 2; b++) + clrsetbits_le32(&ch[chn].phy.shu[0].rk[rank].b[b].dq[7], + FINE_TUNE_DQ_MASK | FINE_TUNE_DQM_MASK, + (dqdly_tune[b].fine_tune << + FINE_TUNE_DQ_SHIFT) | + (dqmdly_tune[b].fine_tune << + FINE_TUNE_DQM_SHIFT)); +} + +static void set_rx_best_dly_factor(u8 chn, u8 rank, + struct dqdqs_perbit_dly *dqdqs_perbit_dly, + u32 *max_dqsdly_byte, u32 *ave_dqm_dly) +{ + u32 i, value = 0; + u8 index, byte; + + for (i = 0; i < DQS_NUMBER; i++) { + value = (max_dqsdly_byte[i] << 24) | + (max_dqsdly_byte[i] << 16) | + (ave_dqm_dly[i] << 8) | (ave_dqm_dly[i] << 0); + + /* Delay dqs/dqm */ + write32(&ch[chn].phy.shu[0].rk[rank].b[i].dq[6], value); + } + dram_phy_reset(chn); + + value = 0; + for (i = 0; i < DQ_DATA_WIDTH; i += 2) { + byte = i / DQS_BIT_NUMBER; + index = 2 + ((i % 8) * 2) / 4; + value = dqdqs_perbit_dly[i + 1].best_dqdly << 24 | + dqdqs_perbit_dly[i + 1].best_dqdly << 16 | + dqdqs_perbit_dly[i].best_dqdly << 8 | + dqdqs_perbit_dly[i].best_dqdly; + write32(&ch[chn].phy.shu[0].rk[rank].b[byte].dq[index], value); + } +} + +static u8 dramc_calc_best_dly(u8 bit, + struct dqdqs_perbit_dly *p, u32 *p_max_byte) +{ + u8 fail = 0, hold, setup; + + hold = p->best_last_dqsdly - p->best_first_dqsdly + 1; + setup = p->best_last_dqdly - p->best_first_dqdly + 1; + + if (hold > setup) { + p->best_dqdly = 0; + p->best_dqsdly = (setup != 0) ? (hold - setup) / 2 : + (hold - setup) / 2 + p->best_first_dqsdly; + + if (p->best_dqsdly > *p_max_byte) + *p_max_byte = p->best_dqsdly; + + } else if (hold < setup) { + p->best_dqsdly = 0; + p->best_dqdly = (hold != 0) ? (setup - hold) / 2 : + (setup - hold) / 2 + p->best_first_dqdly; + + } else { /* hold time == setup time */ + p->best_dqsdly = 0; + p->best_dqdly = 0; + + if (hold == 0) { + dramc_dbg("ERROR, error bit %d, " + "setup_time = hold_time = 0\n", bit); + fail = 1; + } + } + + dramc_dbg("bit#%d : dq =%d dqs=%d win=%d (%d, %d)\n", + bit, setup, hold, setup + hold, p->best_dqdly, p->best_dqsdly); + + return fail; +} + +static void dramc_set_dqdqs_dly(u8 chn, u8 rank, u8 cal_type, s32 dly) +{ + if ((cal_type == RX_WIN_RD_DQC) || (cal_type == RX_WIN_TEST_ENG)) + dramc_set_rx_dly(chn, rank, dly); + else + set_tx_dly_factor(chn, rank, cal_type, dly); +} + +static void dramc_set_tx_best_dly(u8 chn, u8 rank, + struct dqdqs_perbit_dly *tx_dly, u16 *tx_dq_precal_result, + const struct sdram_params *params) +{ + s16 dq_win_center[DQ_DATA_WIDTH]; + u16 pi_diff; + u32 byte_dly_cell[DQS_NUMBER] = {0, 0}; + struct per_byte_dly tx_perbyte_dly[DQS_NUMBER]; + u16 dly_cell_unit = params->delay_cell_unit; + int i, index, bit; + u16 dq_delay_cell[DQ_DATA_WIDTH]; + + for (i = 0; i < DQS_NUMBER; i++) { + tx_perbyte_dly[i].min_center = 0xffff; + tx_perbyte_dly[i].max_center = 0; + } + + for (i = 0; i < DQ_DATA_WIDTH; i++) { + index = i / DQS_BIT_NUMBER; + dramc_calc_tx_perbyte_dly(&tx_dly[i], + &dq_win_center[i], &tx_perbyte_dly[index]); + } + + for (i = 0; i < DQS_NUMBER; i++) { + tx_perbyte_dly[i].final_dly = tx_perbyte_dly[i].min_center; + tx_dq_precal_result[i] = (tx_perbyte_dly[i].max_center + + tx_perbyte_dly[i].min_center) >> 1; + + for (bit = 0; bit < DQS_BIT_NUMBER; bit++) { + pi_diff = dq_win_center[i * 8 + bit] + - tx_perbyte_dly[i].min_center; + dq_delay_cell[i * 8 + bit] = + ((pi_diff * 1000000) / (16 * 64)) + / dly_cell_unit; + byte_dly_cell[i] |= + (dq_delay_cell[i * 8 + bit] << (bit * 4)); + } + + write32(&ch[chn].phy.shu[0].rk[rank].b[i].dq[0], + byte_dly_cell[i]); + } + + set_tx_best_dly_factor(chn, rank, tx_perbyte_dly, tx_dq_precal_result); +} + +static int dramc_set_rx_best_dly(u8 chn, u8 rank, + struct dqdqs_perbit_dly *rx_dly) +{ + s16 dly; + s8 fail = 0; + u8 i, index, max_limit; + static u32 max_dqsdly_byte[DQS_NUMBER]; + static u32 ave_dqmdly_byte[DQS_NUMBER]; + + for (i = 0; i < DQS_NUMBER; i++) { + max_dqsdly_byte[i] = 0; + ave_dqmdly_byte[i] = 0; + } + + for (i = 0; i < DQ_DATA_WIDTH; i++) { + index = i / DQS_BIT_NUMBER; + fail |= dramc_calc_best_dly(i, &rx_dly[i], + &max_dqsdly_byte[index]); + } + + for (i = 0; i < DQ_DATA_WIDTH; i++) { + index = i / DQS_BIT_NUMBER; + /* set DQS to max for 8-bit */ + if (rx_dly[i].best_dqsdly < max_dqsdly_byte[index]) { + /* Delay DQ to compensate extra DQS delay */ + dly = max_dqsdly_byte[index] - rx_dly[i].best_dqsdly; + rx_dly[i].best_dqdly += dly; + max_limit = MAX_DQDLY_TAPS - 1; + if (rx_dly[i].best_dqdly > max_limit) + rx_dly[i].best_dqdly = max_limit; + } + + ave_dqmdly_byte[index] += rx_dly[i].best_dqdly; + if ((i + 1) % DQS_BIT_NUMBER == 0) + ave_dqmdly_byte[index] /= DQS_BIT_NUMBER; + } + + if (fail == 1) { + dramc_dbg("FAIL on perbit_window_cal()\n"); + return -1; + } + + set_rx_best_dly_factor(chn, rank, rx_dly, max_dqsdly_byte, + ave_dqmdly_byte); + return 0; +} + +static void dramc_get_vref_prop(u8 rank, u8 cal_type, + u8 *vref_scan_en, u8 *vref_begin, u8 *vref_end) +{ + if ((cal_type == RX_WIN_TEST_ENG) && (rank == RANK_0)) { + *vref_scan_en = 1; + *vref_begin = RX_VREF_BEGIN; + *vref_end = RX_VREF_END; + } else if (cal_type == TX_WIN_DQ_ONLY) { + *vref_scan_en = 1; + *vref_begin = TX_VREF_BEGIN; + *vref_end = TX_VREF_END; + } else + *vref_scan_en = 0; +} + +static void dramc_engine2_setpat(u8 chn, u8 testaudpat) +{ + const u32 log2loopcount = 0; + const u32 len1 = 0; + + clrbits_le32(&ch[chn].ao.test2_4, + 0x1 << TEST2_4_TESTXTALKPAT_SHIFT | + 0x1 << TEST2_4_TESTAUDMODE_SHIFT | + 0x1 << TEST2_4_TESTAUDBITINV_SHIFT); + + if (testaudpat == 0) { + setbits_le32(&ch[chn].ao.perfctl0, 1 << PERFCTL0_RWOFOEN_SHIFT); + + clrsetbits_le32(&ch[chn].ao.test2_4, + 0x1 << TEST2_4_TEST_REQ_LEN1_SHIFT | + 0x1 << TEST2_4_TESTSSOPAT_SHIFT | + 0x1 << TEST2_4_TESTSSOXTALKPAT_SHIFT | + 0x1 << TEST2_4_TESTXTALKPAT_SHIFT, + len1 << TEST2_4_TEST_REQ_LEN1_SHIFT); + } else if (testaudpat == 1) { + clrsetbits_le32(&ch[chn].ao.test2_4, + TEST2_4_TESTAUDINIT_MASK | TEST2_4_TESTAUDINC_MASK, + (0x11 << 8) | (0xd << 0) | (0x1 << 14)); + } + clrsetbits_le32(&ch[chn].ao.test2_3, + 0x1 << TEST2_3_TESTAUDPAT_SHIFT | TEST2_3_TESTCNT_MASK, + (testaudpat ? 1 : 0) << TEST2_3_TESTAUDPAT_SHIFT | + log2loopcount << 0); +} + +static u32 dram_k_perbit(u8 chn, u8 cal_type) +{ + u32 err_value = 0x0; + + if (cal_type == RX_WIN_RD_DQC) { + err_value = dramc_rd_dqc_run(chn); + } else { + dramc_engine2_setpat(chn, 1); + err_value = dramc_engine2_run(chn, TE_OP_WRITE_READ_CHECK); + dramc_engine2_setpat(chn, 0); + err_value |= dramc_engine2_run(chn, TE_OP_WRITE_READ_CHECK); + } + return err_value; +} + +static u8 dramc_window_perbit_cal(u8 chn, u8 rank, + u8 type, const struct sdram_params *params) +{ + u8 vref = 0, vref_begin = 0, vref_end = 1, vref_scan_enable = 0; + s16 dly, dly_begin = 0, dly_end = 0; + u8 i, bit, dly_step = 1; + u32 dummy_rd_backup = 0; + static u16 dq_precal_result[DQS_NUMBER]; + u32 err_value, fail_bit; + static struct vref_perbit_dly vref_dly; + struct dqdqs_perbit_dly dq_perbit_dly[DQ_DATA_WIDTH]; + + dramc_get_vref_prop(rank, type, + &vref_scan_enable, &vref_begin, &vref_end); + if (vref_scan_enable && (type == RX_WIN_RD_DQC)) + dramc_rx_vref_enable(chn); + + dramc_dbg("%s [Rank %d] [Channel %d] cal_type:%d,vref_enable:%d\n", + __func__, rank, chn, type, vref_scan_enable); + + if ((type == TX_WIN_DQ_ONLY) || (type == TX_WIN_DQ_DQM)) { + for (i = 0; i < 2; i++) { + write32(&ch[chn].phy.shu[0].rk[rank].b[i].dq[0], 0); + clrbits_le32(&ch[chn].phy.shu[0].rk[rank].b[i].dq[1], + 0xf); + } + setbits_le32(&ch[chn].phy.misc_ctrl1, + 0x1 << MISC_CTRL1_R_DMAR_FINE_TUNE_DQ_SW_SHIFT); + setbits_le32(&ch[chn].ao.dqsoscr, + 0x1 << DQSOSCR_AR_COARSE_TUNE_DQ_SW_SHIFT); + } + + if (type == RX_WIN_RD_DQC) { + dramc_rd_dqc_init(chn, rank); + } else { + dummy_rd_backup = read32(&ch[chn].ao.dummy_rd); + dramc_engine2_init(chn, rank, 0x400, 0); + } + + vref_dly.max_win = 0; + vref_dly.min_win = 0xffff; + for (vref = vref_begin; vref < vref_end; vref += dly_step) { + vref_dly.vref = vref; + for (i = 0; i < DQ_DATA_WIDTH; i++) { + dq_perbit_dly[i].first_dqdly = -1; + dq_perbit_dly[i].last_dqdly = -2; + dq_perbit_dly[i].first_dqsdly = -1; + dq_perbit_dly[i].last_dqsdly = -2; + dq_perbit_dly[i].best_first_dqdly = -1; + dq_perbit_dly[i].best_last_dqdly = -2; + dq_perbit_dly[i].best_first_dqsdly = -1; + dq_perbit_dly[i].best_last_dqsdly = -2; + } + + if (vref_scan_enable) + dramc_set_vref(chn, rank, type, vref_dly.vref); + + if ((type == RX_WIN_RD_DQC) || (type == RX_WIN_TEST_ENG)) { + set_rx_dly_factor(chn, rank, RX_DQM, FIRST_DQ_DELAY); + set_rx_dly_factor(chn, rank, RX_DQ, FIRST_DQ_DELAY); + } + + dramc_get_dly_range(chn, rank, type, dq_precal_result, + &dly_begin, &dly_end, params); + for (dly = dly_begin; dly < dly_end; dly += dly_step) { + dramc_set_dqdqs_dly(chn, rank, type, dly); + err_value = dram_k_perbit(chn, type); + if (!vref_scan_enable) + dramc_dbg("%d ", dly); + + for (bit = 0; bit < DQ_DATA_WIDTH; bit++) { + fail_bit = err_value & ((u32) 1 << bit); + if (dly < 0) + dramc_check_dqdqs_win( + &(dq_perbit_dly[bit]), + -dly, -FIRST_DQS_DELAY, + fail_bit, 0); + else + dramc_check_dqdqs_win( + &(dq_perbit_dly[bit]), + dly, dly_end, fail_bit, 1); + if (!vref_scan_enable) { + if (fail_bit == 0) + dramc_dbg("o"); + else + dramc_dbg("x"); + } + } + + if (!vref_scan_enable) + dramc_dbg(" [MSB]\n"); + } + + for (bit = 0; bit < DQ_DATA_WIDTH; bit++) + dramc_dbg("dq[%d] win(%d ~ %d)\n", bit, + dq_perbit_dly[bit].best_first_dqdly, + dq_perbit_dly[bit].best_last_dqdly); + + if (dramk_calc_best_vref(type, vref, &vref_dly, dq_perbit_dly)) + break; + } + + if (type == RX_WIN_RD_DQC) { + dramc_rd_dqc_end(chn); + } else { + dramc_engine2_end(chn); + write32(&ch[chn].ao.dummy_rd, dummy_rd_backup); + } + + if (vref_scan_enable) + dramc_set_vref(chn, rank, type, vref_dly.vref); + + if ((type == RX_WIN_RD_DQC) || (type == RX_WIN_TEST_ENG)) + dramc_set_rx_best_dly(chn, rank, vref_dly.perbit_dly); + else + dramc_set_tx_best_dly(chn, rank, vref_dly.perbit_dly, + dq_precal_result, params); + return 0; +} + static void dramc_rx_dqs_gating_post_process(u8 chn) { u8 dqs, rank_rx_dvs, dqsinctl, rank; @@ -971,6 +1809,10 @@ dramc_write_leveling(chn, rk, pams->wr_level); auto_refresh_switch(chn, 1); dramc_rx_dqs_gating_cal(chn, rk); + dramc_window_perbit_cal(chn, rk, RX_WIN_RD_DQC, pams); + dramc_window_perbit_cal(chn, rk, TX_WIN_DQ_DQM, pams); + dramc_window_perbit_cal(chn, rk, TX_WIN_DQ_ONLY, pams); + dramc_window_perbit_cal(chn, rk, RX_WIN_TEST_ENG, pams); }
dramc_rx_dqs_gating_post_process(chn); diff --git a/src/soc/mediatek/mt8183/include/soc/dramc_pi_api.h b/src/soc/mediatek/mt8183/include/soc/dramc_pi_api.h index 761fff6..a67e16d 100644 --- a/src/soc/mediatek/mt8183/include/soc/dramc_pi_api.h +++ b/src/soc/mediatek/mt8183/include/soc/dramc_pi_api.h @@ -38,6 +38,7 @@ #define DRAMC_BROADCAST_ON 0x1f #define DRAMC_BROADCAST_OFF 0x0 #define MAX_BACKUP_REG_CNT 32 +#define TX_DQ_COARSE_TUNE_TO_FINE_TUNE_TAP 64
#define IMP_LP4X_TERM_VREF_SEL 0x1b #define IMP_DRVP_LP4X_UNTERM_VREF_SEL 0x1a diff --git a/src/soc/mediatek/mt8183/include/soc/memlayout.ld b/src/soc/mediatek/mt8183/include/soc/memlayout.ld index f148eed..2a6d42d 100644 --- a/src/soc/mediatek/mt8183/include/soc/memlayout.ld +++ b/src/soc/mediatek/mt8183/include/soc/memlayout.ld @@ -39,7 +39,7 @@ SRAM_END(0x00120000)
SRAM_L2C_START(0x00200000) - OVERLAP_DECOMPRESSOR_ROMSTAGE(0x000201000, 92K) + OVERLAP_DECOMPRESSOR_ROMSTAGE(0x000201000, 110K) BOOTBLOCK(0x00227000, 89K) VERSTAGE(0x0023E000, 114K) SRAM_L2C_END(0x00280000)