Bring from coreboot v1 support for initializing L2 cache on Slot 1 Pentium II/III CPUs, code names Klamath, Deschutes and Katmai. Build tested on ASUS P2B-LS and P3B-F. Boot tested on P2B-LS with Pentium III 600MHz, Katmai core. Also add missing include of model_68x in slot_1, to address a similar problem seen before r5945. Signed-off-by: Keith Hui Acked-by: Roger Zauner --- Index: src/include/cpu/intel/l2_cache.h =================================================================== --- src/include/cpu/intel/l2_cache.h (revision 0) +++ src/include/cpu/intel/l2_cache.h (revision 0) @@ -0,0 +1,102 @@ +/* + * This file is part of the coreboot project. + * + * Copyright (C) 2010 Keith Hui + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +/* The L2 cache definitions here only apply to SECC/SECC2 P6 family CPUs + * with Klamath (63x), Deschutes (65x) and Katmai (67x) cores. + * It is not required for Coppermine (68x) and Tualatin (6bx) cores. + * It is currently not known if Celerons with Mendocino core require + * the special initialization. + * Covington-core Celerons do not have L2 cache. + */ + +/* This is a straight port from coreboot v1. */ + +#ifndef __P6_L2_CACHE_H +#define __P6_L2_CACHE_H + +#define IA32_PLATFORM_ID 0x17 +#define EBL_CR_POWERON 0x2A + +#define BBL_CR_D0 0x88 +#define BBL_CR_D1 0x89 +#define BBL_CR_D2 0x8A +#define BBL_CR_D3 0x8B + +#define BBL_CR_ADDR 0x116 +#define BBL_CR_DECC 0x118 +#define BBL_CR_CTL 0x119 +#define BBL_CR_TRIG 0x11A +#define BBL_CR_BUSY 0x11B +#define BBL_CR_CTL3 0x11E + +#define BBLCR3_L2_CONFIGURED (1<<0) +/* bits [4:1] */ +#define BBLCR3_L2_LATENCY 0x1e +#define BBLCR3_L2_ECC_CHECK_ENABLE (1<<5) +#define BBLCR3_L2_ADDR_PARITY_ENABLE (1<<6) +#define BBLCR3_L2_CRTN_PARITY_ENABLE (1<<7) +#define BBLCR3_L2_ENABLED (1<<8) +/* bits [17:13] */ +#define BBLCR3_L2_SIZE (0x1f << 13) +#define BBLCR3_L2_SIZE_256K (0x01 << 13) +#define BBLCR3_L2_SIZE_512K (0x02 << 13) +#define BBLCR3_L2_SIZE_1M (0x04 << 13) +#define BBLCR3_L2_SIZE_2M (0x08 << 13) +#define BBLCR3_L2_SIZE_4M (0x10 << 13) +/* bits [22:20] */ +#define BBLCR3_L2_PHYSICAL_RANGE (0x7 << 20); +/* TODO: This bitmask does not agree with Intel's documentation. + * Get confirmation one way or another. + */ +#define BBLCR3_L2_SUPPLIED_ECC 0x40000 + +#define BBLCR3_L2_HARDWARE_DISABLE (1<<23) +/* Also known as... */ +#define BBLCR3_L2_NOT_PRESENT (1<<23) + +/* L2 commands */ +#define L2CMD_RLU 0x0c /* 01100 Data read w/ LRU update */ +#define L2CMD_TRR 0x0e /* 01110 Tag read with data read */ +#define L2CMD_TI 0x0f /* 01111 Tag inquiry */ +#define L2CMD_CR 0x02 /* 00010 L2 control register read */ +#define L2CMD_CW 0x03 /* 00011 L2 control register write */ +#define L2CMD_TWR 0x08 /* 010-- Tag read w/ data read */ +#define L2CMD_TWW 0x1c /* 111-- Tag write w/ data write */ +#define L2CMD_TW 0x10 /* 100-- Tag write */ +/* MESI encode for L2 commands above */ +#define L2CMD_MESI_M 3 +#define L2CMD_MESI_E 2 +#define L2CMD_MESI_S 1 +#define L2CMD_MESI_I 0 + +extern int calculate_l2_latency(void); +extern int signal_l2(u32 address_low, u32 data_high, u32 data_low, int way, u8 command); +extern int read_l2(u32 address); +extern int write_l2(u32 address, u32 data); +extern int test_l2_address_alias(u32 address1, u32 address2, + u32 data_high, u32 data_low); +extern int calculate_l2_cache_size(void); +extern int calculate_l2_physical_address_range(void); +extern int set_l2_ecc(void); + +extern int p6_configure_l2_cache(void); + +#endif /* __P6_L2_CACHE_H */ Index: src/cpu/intel/model_65x/model_65x_init.c =================================================================== --- src/cpu/intel/model_65x/model_65x_init.c (revision 6247) +++ src/cpu/intel/model_65x/model_65x_init.c (working copy) @@ -27,6 +27,7 @@ #include #include #include +#include static u32 microcode_updates[] = { #include "microcode-410-MU16522d.h" @@ -64,6 +65,8 @@ /* Update the microcode */ intel_update_microcode(microcode_updates); + p6_configure_l2_cache(); + /* Enable the local cpu apics */ setup_lapic(); }; Index: src/cpu/intel/model_67x/model_67x_init.c =================================================================== --- src/cpu/intel/model_67x/model_67x_init.c (revision 6247) +++ src/cpu/intel/model_67x/model_67x_init.c (working copy) @@ -26,6 +26,7 @@ #include #include #include +#include static const uint32_t microcode_updates[] = { /* Include microcode updates here. */ @@ -46,6 +47,9 @@ /* Update the microcode */ intel_update_microcode(microcode_updates); + /* Initialize L2 cache */ + p6_configure_l2_cache(); + /* Turn on caching if we haven't already */ x86_enable_cache(); Index: src/cpu/intel/slot_1/l2_cache.c =================================================================== --- src/cpu/intel/slot_1/l2_cache.c (revision 0) +++ src/cpu/intel/slot_1/l2_cache.c (revision 0) @@ -0,0 +1,810 @@ +/* + * This file is part of the coreboot project. + * + * Copyright (C) 2000 Denis Dowling + * Copyright (C) 2010 Keith Hui + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * Intel Pentium L2 Cache initialization. + * This code was developed by reverse engineering + * the BIOS. Where the code accesses documented + * registers I have added comments as best I can. + * Some undocumented registers on the Pentium II are + * used so some of the documentation is incomplete + * + * References: + * Intel Architecture Software Developer's Manual + * Volume 3B: System Programming Guide, Part 2 (#253669) + * Appendix B.9 + */ + +/* This code is ported from coreboot v1. + * The L2 cache initalization sequence here only apply to SECC/SECC2 P6 family + * CPUs with Klamath (63x), Deschutes (65x) and Katmai (67x) cores. + * It is not required for Coppermine (68x) and Tualatin (6bx) cores. + * It is currently not known if Celerons with Mendocino (66x) core require the + * special initialization. + * Covington-core Celerons do not have L2 cache. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Latency Tables */ +struct latency_entry { + u8 key; + u8 value; +}; +/* +Latency maps for Deschutes and Katmai. +No such mapping is available for Klamath. + +Cache latency to +be written to L2 -----++++ +control register |||| +0000 xx 00 -----> 000 cccc 0 +|||| 00 66MHz +|||| 10 100MHz +|||| 01 133MHz (Katmai "B" only) +++++------ CPU frequency multiplier + +0000 2x +0001 3x +0010 4x +0011 5x +0100 2.5x +0101 3.5x +0110 4.5x +0111 5.5x +1000 6x +1001 7x +1010 8x +1011 Reserved +1100 6.5x +1101 7.5x +1110 1.5x +1111 2x + +*/ +static const struct latency_entry latency_650_t0[] = { + {0x10, 0x02}, {0x50, 0x02}, {0x20, 0x04}, {0x60, 0x06}, + {0x00, 0x08}, {0x40, 0x0C}, {0x12, 0x06}, {0x52, 0x0A}, + {0x22, 0x0E}, {0x62, 0x10}, {0x02, 0x10}, {0xFF, 0x00} +}; + +static const struct latency_entry latency_650_t1[] = { + {0x12, 0x14}, {0x52, 0x16}, {0x22, 0x16}, {0x62, 0x16}, + {0xFF, 0x00} +}; + +static const struct latency_entry latency_670_t0[] = { + {0x60, 0x06}, {0x00, 0x08}, {0x12, 0x06}, {0x52, 0x0A}, + {0x22, 0x0E}, {0x62, 0x10}, {0x02, 0x10}, {0x42, 0x02}, + {0x11, 0x0E}, {0x51, 0x0C}, {0x21, 0x02}, {0x61, 0x10}, + {0x01, 0x10}, {0x41, 0x02}, {0xFF, 0x00} +}; + +static const struct latency_entry latency_670_t1[] = { + {0x22, 0x18}, {0x62, 0x18}, {0x02, 0x1A}, {0x11, 0x18}, + {0xFF, 0x00} +}; + +static const struct latency_entry latency_670_t2[] = { + {0x22, 0x12}, {0x62, 0x14}, {0x02, 0x16}, {0x42, 0x1E}, + {0x11, 0x12}, {0x51, 0x16}, {0x21, 0x1E}, {0x61, 0x14}, + {0x01, 0x16}, {0x41, 0x1E}, {0xFF, 0x00} +}; + +/* Latency tables for 650 model/type */ +static const struct latency_entry *latency_650[] = { + latency_650_t0, latency_650_t1, latency_650_t1 +}; + +/* Latency tables for 670 model/type */ +static const struct latency_entry *latency_670[] = { + latency_670_t0, latency_670_t1, latency_670_t2 +}; + +int calculate_l2_latency(void) +{ + u32 eax, l, signature; + const struct latency_entry *latency_table, *le; + msr_t msr; + + /* First, attempt to get cache latency value from + IA32_PLATFORM_ID[56:53]. (L2 Cache Latency Read) + */ + msr = rdmsr(IA32_PLATFORM_ID); + + printk(BIOS_DEBUG,"rdmsr(IA32_PLATFORM_ID) = %x:%x\n", msr.hi, msr.lo); + + l = (msr.hi >> 20) & 0x1e; + + if (l == 0) { + /* If latency value isn't available from + IA32_PLATFORM_ID[56:53], read it from + L2 control register 0 for lookup from + tables. */ + int t, a; + + /* The raw code is read from L2 register 0, bits [7:4]. */ + a = read_l2(0); + if (a < 0) + return -1; + + a &= 0xf0; + + if ((a & 0x20) == 0) + t = 0; + else if (a == 0x20) + t = 1; + else if (a == 0x30) + t = 2; + else + return -1; + + printk(BIOS_DEBUG,"L2 latency type = %x\n", t); + + /* Get CPUID family/model */ + signature = cpuid_eax(1) & 0xfff0; + + /* Read EBL_CR_POWERON */ + msr = rdmsr(EBL_CR_POWERON); + /* Get clock multiplier and FSB frequency. + * Multiplier is in [25:22]. + * FSB is in [19:18] in Katmai, [19] in Deschutes ([18] is zero for them). + */ + eax = msr.lo >> 18; + if (signature == 0x650) { + eax &= ~0xf2; + latency_table = latency_650[t]; + } else if (signature == 0x670) { + eax &= ~0xf3; + latency_table = latency_670[t]; + } else + return -1; + + /* Search table for matching entry */ + for (le = latency_table; le->key != eax; le++) { + /* Fail if we get to the end of the table */ + if (le->key == 0xff) { + printk(BIOS_DEBUG, "Could not find key %02x in latency table\n", eax); + return -1; + } + } + + l = le->value; + } + + printk(BIOS_DEBUG,"L2 Cache latency is %d\n", l / 2); + + /* Writes the calculated latency in BBL_CR_CTL3[4:1]. */ + msr = rdmsr(BBL_CR_CTL3); + msr.lo &= 0xffffffe1; + msr.lo |= l; + wrmsr(BBL_CR_CTL3, msr); + + return 0; +} + + +/* Setup address, data_high:data_low into the L2 + * control registers and then issue command with correct cache way + */ +int signal_l2(u32 address, u32 data_high, u32 data_low, int way, u8 command) +{ + int i; + msr_t msr; + + /* Write L2 Address to BBL_CR_ADDR */ + msr.lo = address; + msr.hi = 0; + wrmsr(BBL_CR_ADDR, msr); + + /* Write data to BBL_CR_D{0..3} */ + msr.lo = data_low; + msr.hi = data_high; + for (i = BBL_CR_D0; i <= BBL_CR_D3; i++) { + wrmsr(i, msr); + } + + /* Put the command and way into BBL_CR_CTL */ + msr = rdmsr(BBL_CR_CTL); + msr.lo = (msr.lo & 0xfffffce0) | command | (way << 8); + wrmsr(BBL_CR_CTL, msr); + + /* Trigger L2 controller */ + msr.lo = 0; + msr.hi = 0; + wrmsr(BBL_CR_TRIG, msr); + + /* Poll the controller to see when done */ + for (i = 0; i < 0x100; i++) { + /* Read BBL_CR_BUSY */ + msr = rdmsr(BBL_CR_BUSY); + /* If not busy then return */ + if ((msr.lo & 1) == 0) + return 0; + } + + /* Return timeout code */ + return -1; +} + +/* Read the L2 Cache controller register at given address */ +int read_l2(u32 address) +{ + msr_t msr; + + /* Send a L2 Control Register Read to L2 controller */ + if (signal_l2(address << 5, 0, 0, 0, L2CMD_CR) != 0) + return -1; + + /* If OK then get the result from BBL_CR_ADDR */ + msr = rdmsr(BBL_CR_ADDR); + return (msr.lo >> 0x15); + +} + +/* Write data into the L2 controller register at address */ +int write_l2(u32 address, u32 data) +{ + int v1, v2, i; + + v1 = read_l2(0); + if (v1 < 0) + return -1; + + v2 = read_l2(2); + if (v2 < 0) + return -1; + + if ((v1 & 0x20) == 0) { + v2 &= 0x3; + v2++; + } else + v2 &= 0x7; + + /* This write has to be replicated to a number of places. Not sure what. */ + + for (i = 0; i < v2; i++) { + + u32 data1, data2; + // Bits legend + // data1 = ffffffff + // data2 = 000000dc + // address = 00aaaaaa + // Final address signalled: + // 000fffff fff000c0 000dcaaa aaa00000 + data1 = data & 0xff; + data1 = data1 << 21; + data2 = (i << 11) & 0x1800; + data1 |= data2; + data2 <<= 6; + data2 &= 0x20000; + data1 |= data2; + + /* Signal L2 controller */ + if (signal_l2((address << 5) | data1, 0, 0, 0, 3)) + return -1; + } + return 0; +} + +/* Write data_high:data_low into the cache at address1. Test address2 + * to see if the same data is returned. Return 0 if the data matches. + * return lower 16 bits if mismatched data if mismatch. Return -1 + * on error + */ +int test_l2_address_alias(u32 address1, u32 address2, + u32 data_high, u32 data_low) +{ + int d; + msr_t msr; + + /* Tag Write with Data Write for L2 */ + if (signal_l2(address1, data_high, data_low, 0, L2CMD_TWW)) + return -1; + + /* Tag Read with Data Read for L2 */ + if (signal_l2(address2, 0, 0, 0, L2CMD_TRR)) + return -1; + + /* Read data from BBL_CR_D[0-3] */ + for (d = BBL_CR_D0; d <= BBL_CR_D3; d++) { + msr = rdmsr(d); + if (msr.lo != data_low || msr.hi != data_high) + return (msr.lo & 0xffff); + } + + return 0; +} + +/* Calculates the L2 cache size. + * + * Reference: Intel(R) 64 and IA-32 Architectures Software Developer�s Manual + * Volume 3B: System Programming Guide, Part 2, Intel pub. 253669, pg. B-172. + * + */ +int calculate_l2_cache_size(void) +{ + int v; + msr_t msr; + u32 cache_setting; + u32 address, size, eax, bblcr3; + + v = read_l2(0); + if (v < 0) + return -1; + if ((v & 0x20) == 0) { + msr = rdmsr(BBL_CR_CTL3); + bblcr3 = msr.lo & ~BBLCR3_L2_SIZE; + /* + * Successively write in all the possible cache size per bank + * into BBL_CR_CTL3[17:13], starting from 256KB (00001) to 4MB (10000), + * and read the last value written and accepted by the cache. + * + * No idea why these bits are writable at all. + */ + for (cache_setting = BBLCR3_L2_SIZE_256K; + cache_setting <= BBLCR3_L2_SIZE_4M; cache_setting <<= 1) { + + eax = bblcr3 | cache_setting; + msr.lo = eax; + wrmsr(BBL_CR_CTL3, msr); + msr = rdmsr(BBL_CR_CTL3); + + /* Value not accepted */ + if (msr.lo != eax) + break; + } + + /* Backtrack to the last value that worked... */ + cache_setting >>= 1; + + /* and write it into BBL_CR_CTL3 */ + msr.lo &= ~BBLCR3_L2_SIZE; + msr.lo |= (cache_setting & BBLCR3_L2_SIZE); + + wrmsr(BBL_CR_CTL3, msr); + + printk(BIOS_DEBUG,"Maximum cache mask is %x\n", cache_setting); + + /* For now, BBL_CR_CTL3 has the highest cache "size" that register + * will accept. Now we'll ping the cache and see where it wraps. + */ + + /* Write aaaaaaaa:aaaaaaaa to address 0 in the l2 cache. + * If this "alias test" returns an "address", it means the + * cache cannot be written to properly, and we have a problem. + */ + v = test_l2_address_alias(0, 0, 0xaaaaaaaa, 0xaaaaaaaa); + if (v != 0) + return -1; + + /* Start with 32K wrap point (256KB actually) */ + size = 1; + address = 0x8000; + + while (1) { + v = test_l2_address_alias(address, 0, 0x55555555, + 0x55555555); + // Write failed. + if (v < 0) + return -1; + // It wraps here. + else if (v == 0) + break; + + size <<= 1; + address <<= 1; + + if (address > 0x200000) + return -1; + } + + /* Mask size */ + size &= 0x3e; + + /* Shift to [17:13] */ + size <<= 12; + + /* Set this into BBL_CR_CTL3 */ + msr = rdmsr(BBL_CR_CTL3); + msr.lo &= ~BBLCR3_L2_SIZE; + msr.lo |= size; + wrmsr(BBL_CR_CTL3, msr); + + printk(BIOS_DEBUG,"L2 Cache Mask is %x\n", size); + + /* Shift to [6:2] */ + size >>= 11; + + v = read_l2(2); + + if (v < 0) + return -1; + + printk(BIOS_DEBUG,"L2(2): %x ", v); + + v &= 0x3; + + /* Shift size right by v */ + size >>= v; + + /* Or in this size */ + v |= size; + + printk(BIOS_DEBUG,"-> %x\n", v); + + if (write_l2(2, v) != 0) + return -1; + } else { + // Some cache size information is available from L2 registers. + // Work from there. + int b, c; + + v = read_l2(2); + + printk(BIOS_DEBUG,"L2(2) = %x\n", v); + + if (v < 0) + return -1; + + // L2 register 2 bitmap: cc---bbb + b = v & 0x7; + c = v >> 6; + + v = 1 << c * b; + + v &= 0xf; + + printk(BIOS_DEBUG,"Calculated a = %x\n", v); + + if (v == 0) + return -1; + + /* Shift to 17:14 */ + v <<= 14; + + /* Write this size into BBL_CR_CTL3 */ + msr = rdmsr(BBL_CR_CTL3); + msr.lo &= ~BBLCR3_L2_SIZE; + msr.lo |= v; + wrmsr(BBL_CR_CTL3, msr); + } + + return 0; +} + +// L2 physical address range can be found from L2 control register 3, bits [2:0]. +int calculate_l2_physical_address_range(void) +{ + int r0, r3; + msr_t msr; + + r3 = read_l2(3); + if (r3 < 0) + return -1; + + r0 = read_l2(0); + if (r0 < 0) + return -1; + + if (r0 & 0x20) + r3 = 0x7; + else + r3 &= 0x7; + + printk(BIOS_DEBUG,"L2 Physical Address Range is %dM\n", (1 << r3) * 512); + + /* Shift into [22:20] to be saved into BBL_CR_CTL3. */ + r3 = r3 << 20; + + msr = rdmsr(BBL_CR_CTL3); + msr.lo &= ~BBLCR3_L2_PHYSICAL_RANGE; + msr.lo |= r3; + wrmsr(BBL_CR_CTL3, msr); + + return 0; +} + +int set_l2_ecc(void) +{ + u32 eax; + const u32 data1 = 0xaa55aa55; + const u32 data2 = 0xaaaaaaaa; + msr_t msr; + + /* Set User Supplied ECC in BBL_CR_CTL */ + msr = rdmsr(BBL_CR_CTL); + msr.lo |= BBLCR3_L2_SUPPLIED_ECC; + wrmsr(BBL_CR_CTL, msr); + + /* Write a value into the L2 Data ECC register BBL_CR_DECC */ + msr.lo = data1; + msr.hi = 0; + wrmsr(BBL_CR_DECC, msr); + + if (test_l2_address_alias(0, 0, data2, data2) < 0) + return -1; + + /* Read back ECC from BBL_CR_DECC */ + msr = rdmsr(BBL_CR_DECC); + eax = msr.lo; + + if (eax == data1) { + printk(BIOS_DEBUG,"L2 ECC Checking is enabled\n"); + + /* Set ECC Check Enable in BBL_CR_CTL3 */ + msr = rdmsr(BBL_CR_CTL3); + msr.lo |= BBLCR3_L2_ECC_CHECK_ENABLE; + wrmsr(BBL_CR_CTL3, msr); + } + + /* Clear User Supplied ECC in BBL_CR_CTL */ + msr = rdmsr(BBL_CR_CTL); + msr.lo &= ~BBLCR3_L2_SUPPLIED_ECC; + wrmsr(BBL_CR_CTL, msr); + + return 0; +} + +/* + * This is the function called from CPU initialization + * driver to set up P6 family L2 cache. + */ + +int p6_configure_l2_cache(void) +{ + msr_t msr, bblctl3; + unsigned int eax; + u16 signature; + int cache_size, bank; + int result, calc_eax; + int v, a; + + int badclk1, badclk2, clkratio; + int crctl3_or; + + printk(BIOS_INFO, "Configuring L2 cache... "); + + /* Read BBL_CR_CTL3 */ + bblctl3 = rdmsr(BBL_CR_CTL3); + /* If bit 23 (L2 Hardware disable) is set then done */ + /* These would be Covington core Celerons with no L2 cache */ + if (bblctl3.lo & BBLCR3_L2_NOT_PRESENT) { + printk(BIOS_INFO,"hardware disabled\n"); + return 0; + } + + signature = cpuid_eax(1) & 0xfff0; + + /* Klamath-specific bit settings for certain + preliminary checks. + */ + if (signature == 0x630) { + clkratio = 0x1c00000; + badclk2 = 0x1000000; + crctl3_or = 0x44000; + } else { + clkratio = 0x3c00000; + badclk2 = 0x3000000; + crctl3_or = 0x40000; + } + badclk1 = 0xc00000; + + /* Read EBL_CR_POWERON */ + msr = rdmsr(EBL_CR_POWERON); + eax = msr.lo; + /* Mask out [22-25] Clock frequency ratio */ + eax &= clkratio; + if (eax == badclk1 || eax == badclk2) { + printk(BIOS_ERR, "Incorrect clock frequency ratio %x\n", eax); + return -1; + } + + disable_cache(); + + /* Mask out from BBL_CR_CTL3: + * [0] L2 Configured + * [5] ECC Check Enable + * [6] Address Parity Check Enable + * [7] CRTN Parity Check Enable + * [8] L2 Enabled + * [12:11] Number of L2 banks + * [17:13] Cache size per bank + * [18] (Set below) + * [22:20] L2 Physical Address Range Support + */ + bblctl3.lo &= 0xff88061e; + /* Set: + * [17:13] = 00010 = 512Kbyte Cache size per bank (63x) + * [17:13] = 00000 = 128Kbyte Cache size per bank (all others) + * [18] Cache state error checking enable + */ + bblctl3.lo |= crctl3_or; + + /* Write BBL_CR_CTL3 */ + wrmsr(BBL_CR_CTL3, bblctl3); + + if (signature != 0x630) { + eax = bblctl3.lo; + + /* Set the l2 latency in BBL_CR_CTL3 */ + if (calculate_l2_latency() != 0) + goto bad; + + /* Read the new latency values back */ + bblctl3 = rdmsr(BBL_CR_CTL3); + calc_eax = bblctl3.lo; + + /* Write back the original default value */ + bblctl3.lo = eax; + wrmsr(BBL_CR_CTL3, bblctl3); + + /* Write BBL_CR_CTL3[27:26] (reserved??) to bits [1:0] of L2 register 4. + * Apparently all other bits must be preserved, hence these code. + */ + + v = (calc_eax >> 26) & 0x3; + + printk(BIOS_DEBUG,"write_l2(4, %x)\n", v); + + a = read_l2(4); + if (a >= 0) + { + a &= 0xfffc; + a |= v; + a = write_l2(4, a); + /* a now contains result code from write_l2() */ + } + if (a != 0) + goto bad; + + /* Restore the correct latency value into BBL_CR_CTL3 */ + bblctl3.lo = calc_eax; + wrmsr(BBL_CR_CTL3, bblctl3); + } /* ! 63x CPU */ + + /* Read L2 register 0 */ + v = read_l2(0); + + /* If L2(0)[5] set (and can be read properly), enable CRTN and address parity + */ + if (v >= 0 && (v & 0x20)) { + bblctl3 = rdmsr(BBL_CR_CTL3); + bblctl3.lo |= (BBLCR3_L2_ADDR_PARITY_ENABLE | + BBLCR3_L2_CRTN_PARITY_ENABLE); + wrmsr(BBL_CR_CTL3, bblctl3); + } + + /* If something goes wrong at L2 ECC setup, cache ECC + * will just remain disabled. + */ + set_l2_ecc(); + + if (calculate_l2_physical_address_range() != 0) { + printk(BIOS_ERR, "Failed to calculate L2 physical address range"); + goto bad; + } + + if (calculate_l2_cache_size() != 0) { + printk(BIOS_ERR, "Failed to calculate L2 cache size"); + goto bad; + } + + /* Turn on cache. Only L1 is active at this time. */ + enable_cache(); + + /* Get the calculated cache size from BBL_CR_CTL3[17:13] */ + bblctl3 = rdmsr(BBL_CR_CTL3); + cache_size = (bblctl3.lo & BBLCR3_L2_SIZE); + if (cache_size == 0) + cache_size = 0x1000; + cache_size = cache_size << 3; + + /* TODO: Cache size above is per bank. We're supposed to get + * the number of banks from BBL_CR_CTL3[12:11]. + * Confirm that this still provides the correct answer. + */ + bank = (bblctl3.lo >> 11) & 0x3; + if (bank == 0) + bank = 1; + + printk(BIOS_INFO, "size %dK... ", cache_size * bank * 4 / 1024); + + /* Write to all cache lines to initialize */ + + while (cache_size > 0) { + + /* Each cache line is 32 bytes. */ + cache_size -= 32; + + /* Update each way */ + + /* We're supposed to get L2 associativity from BBL_CR_CTL3[10:9]. + * But this code only applies to certain members of the P6 processor family + * and since all P6 processors have 4-way L2 cache, we can safely assume + * 4 way for all cache operations. + */ + + for (v = 0; v < 4; v++) { + /* Send Tag Write w/Data Write (TWW) to L2 controller + * MESI = Invalid + */ + if (signal_l2(cache_size, 0, 0, v, L2CMD_TWW | L2CMD_MESI_I) != 0) { + printk(BIOS_ERR, "Failed on signal_l2(%x, %x)\n", + cache_size, v); + goto bad; + } + } + } + printk(BIOS_DEBUG, "L2 Cache lines initialized\n"); + + /* Disable cache */ + disable_cache(); + + /* Set L2 cache configured in BBL_CR_CTL3 */ + bblctl3 = rdmsr(BBL_CR_CTL3); + bblctl3.lo |= BBLCR3_L2_CONFIGURED; + wrmsr(BBL_CR_CTL3, bblctl3); + + /* Invalidate cache and discard unsaved writes */ + asm volatile ("invd"); + + /* Write 0 to L2 control register 5 */ + if (write_l2(5, 0) != 0) { + printk(BIOS_ERR,"write_l2(5, 0) failed\n"); + goto done; + } + + bblctl3 = rdmsr(BBL_CR_CTL3); + if (signature == 0x650) { + /* Change the L2 latency to 0101 then back to + * original value. I don't know why this is needed - dpd + */ + eax = bblctl3.lo; + bblctl3.lo &= ~BBLCR3_L2_LATENCY; + bblctl3.lo |= 0x0a; + wrmsr(BBL_CR_CTL3, bblctl3); + bblctl3.lo = eax; + wrmsr(BBL_CR_CTL3, bblctl3); + } + + /* Enable L2 in BBL_CR_CTL3 */ + bblctl3.lo |= BBLCR3_L2_ENABLED; + wrmsr(BBL_CR_CTL3, bblctl3); + + /* Turn on cache. Both L1 and L2 are now active. Wahoo! */ +done: + result = 0; + goto out; +bad: + result = -1; +out: + printk(BIOS_INFO, "done.\n"); + return result; +} + Index: src/cpu/intel/slot_1/Makefile.inc =================================================================== --- src/cpu/intel/slot_1/Makefile.inc (revision 6247) +++ src/cpu/intel/slot_1/Makefile.inc (working copy) @@ -19,9 +19,11 @@ ## ramstage-y += slot_1.c +ramstage-y += l2_cache.c subdirs-y += ../model_6xx subdirs-y += ../model_65x subdirs-y += ../model_67x +subdirs-y += ../model_68x subdirs-y += ../model_6bx subdirs-y += ../../x86/tsc subdirs-y += ../../x86/mtrr