Ronald G. Minnich (rminnich@gmail.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/1256
-gerrit
commit c97a0551f05a44f543f0c1da055c7ade791b4ca8 Author: Ronald G. Minnich rminnich@google.com Date: Fri Jul 20 11:13:51 2012 -0700
Make broadcast SIPI optional.
Broadcast SIPI breaks some platforms. This change introduces a new configuration variable, BROADCAST_SIPI, that controls how SIPI is managed. The default is off, since we know that works. You can turn it on but be aware that even if your platform boots it may get very odd crashes. We need to finish up the broadcast SIPI code and that will take some work.
This builds but is not tested on anything; the internal Google tree is tested (and about the same) and boots on test platforms.
Change-Id: Icf0bd0357353ab7ac4b217939152aa67e8255fa2 Signed-off-by: Ronald G. Minnich rminnich@gmail.com --- src/arch/x86/Kconfig | 27 ++- src/arch/x86/include/arch/cpu.h | 27 ++ src/arch/x86/lib/cpu.c | 40 ++- src/cpu/intel/hyperthreading/intel_sibling.c | 67 +++- src/cpu/intel/model_1067x/model_1067x_init.c | 10 + src/cpu/intel/model_106cx/Kconfig | 1 - src/cpu/intel/model_106cx/model_106cx_init.c | 9 + src/cpu/intel/model_206ax/acpi.c | 16 + src/cpu/intel/model_206ax/model_206ax_init.c | 64 +++ src/cpu/intel/model_6ex/model_6ex_init.c | 5 + src/cpu/intel/model_6fx/model_6fx_init.c | 6 + src/cpu/intel/model_f2x/model_f2x_init.c | 6 + src/cpu/intel/model_f3x/model_f3x_init.c | 7 + src/cpu/intel/model_f4x/model_f4x_init.c | 7 + src/cpu/x86/lapic/lapic_cpu_init.c | 583 ++++++++++++++++++++++++-- src/cpu/x86/lapic/secondary.S | 29 +- src/cpu/x86/pae/pgtbl.c | 11 + src/drivers/i2c/w83795/w83795.c | 9 +- src/include/cpu/cpu.h | 12 +- 19 files changed, 888 insertions(+), 48 deletions(-)
diff --git a/src/arch/x86/Kconfig b/src/arch/x86/Kconfig index 4dfbe70..72e3271 100644 --- a/src/arch/x86/Kconfig +++ b/src/arch/x86/Kconfig @@ -2,7 +2,32 @@ menu "Architecture (x86)"
# This is an SMP option. It relates to starting up APs. # It is usually set in mainboard/*/Kconfig. -# TODO: Improve description. +# If set, startup uses a BROADCAST to start all cores at once +# It is wrong for some HT cores, e.g. those in chromebooks. +# Because it is breaks some platforms, and the old code works on +# all platforms, we leave it off. +config BROADCAST_SIPI + bool + default n + depends on ARCH_X86 + +# This is another SMP option. On some HT platforms the AP +# must not try to halt, but rather must wait in the +# secondary start code for another SIPI so it can stop +# correctly. +config AP_IN_SIPI_WAIT + bool + default n + depends on ARCH_X86 && ! BROADCAST_SIPI + +# Even with BROADCAST_SIP, we may want to (for testing) force +# serialization. This option supports that. +# It defaults to y as that is known-good. +config SERIAL_CPU_INIT + bool + default y + depends on ARCH_X86 +
# Aligns 16bit entry code in bootblock so that hyper-threading CPUs # can boot AP CPUs to enable their shared caches. diff --git a/src/arch/x86/include/arch/cpu.h b/src/arch/x86/include/arch/cpu.h index 0fe5ea5..29215fd 100644 --- a/src/arch/x86/include/arch/cpu.h +++ b/src/arch/x86/include/arch/cpu.h @@ -158,6 +158,33 @@ struct cpu_driver { struct device; struct cpu_driver *find_cpu_driver(struct device *cpu);
+#if CONFIG_BROADCAST_SIPI == 0 +struct cpu_info { + device_t cpu; + unsigned int index; +}; + +static inline struct cpu_info *cpu_info(void) +{ + struct cpu_info *ci; + __asm__("andl %%esp,%0; " + "orl %2, %0 " + :"=r" (ci) + : "0" (~(CONFIG_STACK_SIZE - 1)), + "r" (CONFIG_STACK_SIZE - sizeof(struct cpu_info)) + ); + return ci; +} + +static inline unsigned long cpu_index(void) +{ + struct cpu_info *ci; + ci = cpu_info(); + return ci->index; +} + +#endif /* ! CONFIG_BROADCAST_SIPI */ + struct cpuinfo_x86 { uint8_t x86; /* CPU family */ uint8_t x86_vendor; /* CPU vendor */ diff --git a/src/arch/x86/lib/cpu.c b/src/arch/x86/lib/cpu.c index 7cd955e..65f8a22 100644 --- a/src/arch/x86/lib/cpu.c +++ b/src/arch/x86/lib/cpu.c @@ -9,7 +9,9 @@ #include <device/path.h> #include <device/device.h> #include <smp/spinlock.h> +#if CONFIG_BROADCAST_SIPI == 1 #include <cpu/x86/lapic.h> +#endif /* CONFIG_BROADCAST_SIPI */
/* Standard macro to see if a specific flag is changeable */ static inline int flag_is_changeable_p(uint32_t flag) @@ -235,7 +237,15 @@ static void set_cpu_ops(struct device *cpu) cpu->ops = driver ? driver->ops : NULL; }
+#if CONFIG_BROADCAST_SIPI == 0 +void cpu_initialize(unsigned int index) +#else /* CONFIG_BROADCAST_SIPI */ +#if CONFIG_SMP +static spinlock_t start_cpu_lock = SPIN_LOCK_UNLOCKED; +#endif + void cpu_initialize(struct bus *cpu_bus, int index) +#endif /* CONFIG_BROADCAST_SIPI */ { /* Because we busy wait at the printk spinlock. * It is important to keep the number of printed messages @@ -244,16 +254,41 @@ void cpu_initialize(struct bus *cpu_bus, int index) */ struct device *cpu; struct cpuinfo_x86 c; - struct device_path cpu_path; unsigned char id = lapicid(); +#if CONFIG_BROADCAST_SIPI == 1 + struct device_path cpu_path;
cpu_path.type = DEVICE_PATH_APIC; cpu_path.apic.apic_id = id; + cpu_path.type = DEVICE_PATH_APIC; + cpu_path.apic.apic_id = id; + cpu_path.apic.index = index;
+#if CONFIG_SMP + spin_lock(&start_cpu_lock); +#endif cpu = alloc_find_dev(cpu_bus, &cpu_path); +#if CONFIG_SMP + spin_unlock(&start_cpu_lock); +#endif + printk(BIOS_DEBUG, "Initializing CPU #%d\n", id); +#else /* ! CONFIG_BROADCAST_SIPI */ + struct cpu_info *info; + info = cpu_info(); + + cpu = info->cpu; + if (!cpu) { + die("CPU: missing cpu device structure"); + } + cpu->path.apic.index = index;
printk(BIOS_DEBUG, "Initializing CPU #%d\n", id); + cpu = info->cpu; + if (!cpu) { + die("CPU: missing cpu device structure"); + } +#endif /* CONFIG_BROADCAST_SIPI */
/* Find what type of cpu we are dealing with */ identify_cpu(cpu); @@ -284,8 +319,7 @@ void cpu_initialize(struct bus *cpu_bus, int index) cpu->ops->init(cpu); }
- printk(BIOS_INFO, "CPU #%d initialized\n", id); + printk(BIOS_INFO, "CPU #%d ,lapic id %d, initialized\n", index, id);
- return; }
diff --git a/src/cpu/intel/hyperthreading/intel_sibling.c b/src/cpu/intel/hyperthreading/intel_sibling.c index 8377cd0..dea34d3 100644 --- a/src/cpu/intel/hyperthreading/intel_sibling.c +++ b/src/cpu/intel/hyperthreading/intel_sibling.c @@ -7,13 +7,76 @@ #include <smp/spinlock.h> #include <assert.h>
+#if CONFIG_BROADCAST_SIPI == 0 +#if !CONFIG_SERIAL_CPU_INIT +#error Intel hyper-threading requires serialized cpu init +#endif + +static int first_time = 1; +static int disable_siblings = !CONFIG_LOGICAL_CPUS; + +void intel_sibling_init(device_t cpu) +{ + unsigned i, siblings; + struct cpuid_result result; + + /* On the bootstrap processor see if I want sibling cpus enabled */ + if (first_time) { + first_time = 0; + get_option(&disable_siblings, "hyper_threading"); + } + result = cpuid(1); + /* Is hyperthreading supported */ + if (!(result.edx & (1 << 28))) { + return; + } + /* See how many sibling cpus we have */ + siblings = (result.ebx >> 16) & 0xff; + if (siblings < 1) { + siblings = 1; + } + + printk(BIOS_DEBUG, "CPU: %u %d siblings\n", + cpu->path.apic.apic_id, + siblings); + + /* See if I am a sibling cpu */ + if (cpu->path.apic.apic_id & (siblings -1)) { + if (disable_siblings) { + cpu->enabled = 0; + } + return; + } + /* I am the primary cpu start up my siblings */ + for(i = 1; i < siblings; i++) { + struct device_path cpu_path; + device_t new; + /* Build the cpu device path */ + cpu_path.type = DEVICE_PATH_APIC; + cpu_path.apic.apic_id = cpu->path.apic.apic_id + i; + + /* Allocate new cpu device structure iff sibling CPU + * was not in static device tree. + */ + new = alloc_find_dev(cpu->bus, &cpu_path); + + if (!new) { + continue; + } + + printk(BIOS_DEBUG, "CPU: %u has sibling %u\n", + cpu->path.apic.apic_id, + new->path.apic.apic_id); + } +} + +#else /* CONFIG_BROADCAST_SIPI */ /* Return true if running thread does not have the smallest lapic ID * within a CPU core. */ int intel_ht_sibling(void) { unsigned int core_ids, apic_ids, threads; - apic_ids = 1; if (cpuid_eax(0) >= 1) apic_ids = (cpuid_ebx(1) >> 16) & 0xff; @@ -27,3 +90,5 @@ int intel_ht_sibling(void) threads = (apic_ids / core_ids); return !!(lapicid() & (threads-1)); } +#endif /* CONFIG_BROADCAST_SIPI */ + diff --git a/src/cpu/intel/model_1067x/model_1067x_init.c b/src/cpu/intel/model_1067x/model_1067x_init.c index ddd1381..4a91b04 100644 --- a/src/cpu/intel/model_1067x/model_1067x_init.c +++ b/src/cpu/intel/model_1067x/model_1067x_init.c @@ -29,6 +29,10 @@ #include <cpu/x86/lapic.h> #include <cpu/intel/microcode.h> #include <cpu/intel/speedstep.h> + +#if CONFIG_BROADCAST_SIPI == 0 +#include <cpu/intel/hyperthreading.h> +#endif /* ! CONFIG_BROADCAST_SIPI */ #include <cpu/x86/cache.h> #include <cpu/x86/name.h>
@@ -220,6 +224,12 @@ static void model_1067x_init(device_t cpu)
/* PIC thermal sensor control */ configure_pic_thermal_sensors(); +#if CONFIG_BROADCAST_SIPI == 0 + + /* Start up my cpu siblings */ + intel_sibling_init(cpu); +#endif /* ! CONFIG_BROADCAST_SIPI */ + }
static struct device_operations cpu_dev_ops = { diff --git a/src/cpu/intel/model_106cx/Kconfig b/src/cpu/intel/model_106cx/Kconfig index 2ef7392..00abf6b 100644 --- a/src/cpu/intel/model_106cx/Kconfig +++ b/src/cpu/intel/model_106cx/Kconfig @@ -11,4 +11,3 @@ config CPU_ADDR_BITS default 32
endif - diff --git a/src/cpu/intel/model_106cx/model_106cx_init.c b/src/cpu/intel/model_106cx/model_106cx_init.c index 8d2ef3d..16660ac 100644 --- a/src/cpu/intel/model_106cx/model_106cx_init.c +++ b/src/cpu/intel/model_106cx/model_106cx_init.c @@ -27,6 +27,10 @@ #include <cpu/x86/lapic.h> #include <cpu/intel/microcode.h> #include <cpu/intel/speedstep.h> + +#if CONFIG_BROADCAST_SIPI == 0 +#include <cpu/intel/hyperthreading.h> +#endif /* ! CONFIG_BROADCAST_SIPI */ #include <cpu/x86/cache.h> #include <cpu/x86/name.h> #include <usbdebug.h> @@ -177,6 +181,11 @@ static void model_106cx_init(device_t cpu) configure_misc();
/* TODO: PIC thermal sensor control */ +#if CONFIG_BROADCAST_SIPI == 0 + + /* Start up my cpu siblings */ + intel_sibling_init(cpu); +#endif /* ! CONFIG_BROADCAST_SIPI */ }
static struct device_operations cpu_dev_ops = { diff --git a/src/cpu/intel/model_206ax/acpi.c b/src/cpu/intel/model_206ax/acpi.c index dea23e7..1b5fa0a 100644 --- a/src/cpu/intel/model_206ax/acpi.c +++ b/src/cpu/intel/model_206ax/acpi.c @@ -26,7 +26,9 @@ #include <arch/acpigen.h> #include <arch/cpu.h> #include <cpu/x86/msr.h> +#if CONFIG_BROADCAST_SIPI == 1 #include <cpu/x86/lapic.h> +#endif /* CONFIG_BROADCAST_SIPI */ #include <cpu/intel/acpi.h> #include <cpu/intel/speedstep.h> #include <cpu/intel/turbo.h> @@ -89,8 +91,14 @@ static int generate_cstate_entries(acpi_cstate_t *cstates,
static int generate_C_state_entries(void) { +#if CONFIG_BROADCAST_SIPI == 0 + struct cpu_info *info; +#endif /* ! CONFIG_BROADCAST_SIPI */ + struct cpu_driver *cpu; +#if CONFIG_BROADCAST_SIPI == 1 struct cpu_driver *cpu; struct device *cpu_dev; +#endif /* CONFIG_BROADCAST_SIPI */ int len, lenif; device_t lapic; struct cpu_intel_model_206ax_config *conf = NULL; @@ -104,10 +112,18 @@ static int generate_C_state_entries(void) return 0;
/* Find CPU map of supported C-states */ +#if CONFIG_BROADCAST_SIPI == 0 + info = cpu_info(); + if (!info) + return 0; + cpu = find_cpu_driver(info->cpu); +#else /* CONFIG_BROADCAST_SIPI */ cpu_dev = dev_find_lapic(lapicid()); if (!cpu_dev) return 0; cpu = find_cpu_driver(cpu_dev); +#endif /* CONFIG_BROADCAST_SIPI */ + if (!cpu || !cpu->cstates) return 0;
diff --git a/src/cpu/intel/model_206ax/model_206ax_init.c b/src/cpu/intel/model_206ax/model_206ax_init.c index d60c237..6e76d7f 100644 --- a/src/cpu/intel/model_206ax/model_206ax_init.c +++ b/src/cpu/intel/model_206ax/model_206ax_init.c @@ -414,6 +414,63 @@ static void configure_mca(void) static unsigned ehci_debug_addr; #endif
+#if CONFIG_BROADCAST_SIPI == 0 +/* + * Initialize any extra cores/threads in this package. + */ +static void intel_cores_init(device_t cpu) +{ + struct cpuid_result result; + unsigned cores, threads, i; + + result = cpuid_ext(0xb, 0); /* Threads per core */ + threads = result.ebx & 0xff; + + result = cpuid_ext(0xb, 1); /* Cores per package */ + cores = result.ebx & 0xff; + + /* Only initialize extra cores from BSP */ + if (cpu->path.apic.apic_id) + return; + + printk(BIOS_DEBUG, "CPU: %u has %u cores %u threads\n", + cpu->path.apic.apic_id, cores, threads); + + for (i = 1; i < cores; ++i) { + struct device_path cpu_path; + device_t new; + + /* Build the cpu device path */ + cpu_path.type = DEVICE_PATH_APIC; + cpu_path.apic.apic_id = + cpu->path.apic.apic_id + i; + + /* Update APIC ID if no hyperthreading */ + if (threads == 1) + cpu_path.apic.apic_id <<= 1; + + /* Allocate the new cpu device structure */ + new = alloc_dev(cpu->bus, &cpu_path); + if (!new) + continue; + + printk(BIOS_DEBUG, "CPU: %u has core %u\n", + cpu->path.apic.apic_id, + new->path.apic.apic_id); + +#if CONFIG_SMP && CONFIG_MAX_CPUS > 1 + /* Start the new cpu */ + if (!start_cpu(new)) { + /* Record the error in cpu? */ + printk(BIOS_ERR, "CPU %u would not start!\n", + new->path.apic.apic_id); + } +#endif + } +} + +#endif /* ! CONFIG_BROADCAST_SIPI */ + static void model_206ax_init(device_t cpu) { char processor_name[49]; @@ -476,6 +533,13 @@ static void model_206ax_init(device_t cpu)
/* Enable Turbo */ enable_turbo(); + +#if CONFIG_BROADCAST_SIPI == 0 + + /* Start up extra cores */ + intel_cores_init(cpu); +#endif /* ! CONFIG_BROADCAST_SIPI */ + }
static struct device_operations cpu_dev_ops = { diff --git a/src/cpu/intel/model_6ex/model_6ex_init.c b/src/cpu/intel/model_6ex/model_6ex_init.c index a0afd2e..989ca27 100644 --- a/src/cpu/intel/model_6ex/model_6ex_init.c +++ b/src/cpu/intel/model_6ex/model_6ex_init.c @@ -205,6 +205,11 @@ static void model_6ex_init(device_t cpu)
/* PIC thermal sensor control */ configure_pic_thermal_sensors(); +#if CONFIG_BROADCAST_SIPI == 0 + + /* Start up my cpu siblings */ + intel_sibling_init(cpu); +#endif /* ! CONFIG_BROADCAST_SIPI */ }
static struct device_operations cpu_dev_ops = { diff --git a/src/cpu/intel/model_6fx/model_6fx_init.c b/src/cpu/intel/model_6fx/model_6fx_init.c index c5d7a6b..171059a 100644 --- a/src/cpu/intel/model_6fx/model_6fx_init.c +++ b/src/cpu/intel/model_6fx/model_6fx_init.c @@ -243,6 +243,12 @@ static void model_6fx_init(device_t cpu)
/* PIC thermal sensor control */ configure_pic_thermal_sensors(); + +#if CONFIG_BROADCAST_SIPI == 0 + + /* Start up my cpu siblings */ + intel_sibling_init(cpu); +#endif /* ! CONFIG_BROADCAST_SIPI */ }
static struct device_operations cpu_dev_ops = { diff --git a/src/cpu/intel/model_f2x/model_f2x_init.c b/src/cpu/intel/model_f2x/model_f2x_init.c index fa9e05f..87c3157 100644 --- a/src/cpu/intel/model_f2x/model_f2x_init.c +++ b/src/cpu/intel/model_f2x/model_f2x_init.c @@ -60,6 +60,12 @@ static void model_f2x_init(device_t cpu)
/* Enable the local cpu apics */ setup_lapic(); + +#if CONFIG_BROADCAST_SIPI == 0 + + /* Start up my cpu siblings */ + intel_sibling_init(cpu); +#endif /* ! CONFIG_BROADCAST_SIPI */ };
static struct device_operations cpu_dev_ops = { diff --git a/src/cpu/intel/model_f3x/model_f3x_init.c b/src/cpu/intel/model_f3x/model_f3x_init.c index dd2a45f..96c38cd 100644 --- a/src/cpu/intel/model_f3x/model_f3x_init.c +++ b/src/cpu/intel/model_f3x/model_f3x_init.c @@ -43,6 +43,13 @@ static void model_f3x_init(device_t cpu)
/* Enable the local cpu apics */ setup_lapic(); + +#if CONFIG_BROADCAST_SIPI == 0 + + /* Start up my cpu siblings */ + intel_sibling_init(cpu); +#endif /* ! CONFIG_BROADCAST_SIPI */ + };
static struct device_operations cpu_dev_ops = { diff --git a/src/cpu/intel/model_f4x/model_f4x_init.c b/src/cpu/intel/model_f4x/model_f4x_init.c index af7d9d2..cade95f 100644 --- a/src/cpu/intel/model_f4x/model_f4x_init.c +++ b/src/cpu/intel/model_f4x/model_f4x_init.c @@ -51,6 +51,13 @@ static void model_f4x_init(device_t cpu)
/* Enable the local cpu apics */ setup_lapic(); + +#if CONFIG_BROADCAST_SIPI == 0 + + /* Start up my cpu siblings */ + intel_sibling_init(cpu); +#endif /* ! CONFIG_BROADCAST_SIPI */ + };
static struct device_operations cpu_dev_ops = { diff --git a/src/cpu/x86/lapic/lapic_cpu_init.c b/src/cpu/x86/lapic/lapic_cpu_init.c index b6dc560..b104b67 100644 --- a/src/cpu/x86/lapic/lapic_cpu_init.c +++ b/src/cpu/x86/lapic/lapic_cpu_init.c @@ -16,7 +16,6 @@ #include <cpu/cpu.h> #include <cpu/intel/speedstep.h>
-#if CONFIG_SMP /* This is a lot more paranoid now, since Linux can NOT handle * being told there is a CPU when none exists. So any errors * will return 0, meaning no CPU. @@ -34,9 +33,10 @@ char *lowmem_backup_ptr; int lowmem_backup_size; #endif
+#if CONFIG_SMP && CONFIG_MAX_CPUS > 1 extern char _secondary_start[];
-static void copy_secondary_start_to_1m_below(void) +static void copy_secondary_start_to_lowest_1M(void) { extern char _secondary_start_end[]; unsigned long code_size; @@ -59,13 +59,528 @@ static void copy_secondary_start_to_1m_below(void)
printk(BIOS_DEBUG, "start_eip=0x%08lx, code_size=0x%08lx\n", (long unsigned int)AP_SIPI_VECTOR, code_size); } +#endif /* CONFIG_SMP && CONFIG_MAX_CPUS > 1 */ + +#ifdef __SSE3__ +static __inline__ __attribute__((always_inline)) unsigned long readcr4(void) +{ + unsigned long value; + __asm__ __volatile__ ( + "mov %%cr4, %[value]" + : [value] "=a" (value)); + return value; +} + +static __inline__ __attribute__((always_inline)) void + writecr4(unsigned long Data) +{ + __asm__ __volatile__ ( + "mov %%eax, %%cr4" + : + : "a" (Data) + ); +} +#endif + +#if CONFIG_BROADCAST_SIPI == 0 + +static unsigned long get_valid_start_eip(unsigned long orig_start_eip) +{ + // 16 bit to avoid 0xa0000 + return (unsigned long)orig_start_eip & 0xffff; +} + +static int lapic_start_cpu(unsigned long apicid) +{ + int timeout; + unsigned long send_status, accept_status, start_eip; + int j, num_starts, maxlvt; + + /* + * Starting actual IPI sequence... + */ + + printk(BIOS_SPEW, "Asserting INIT.\n"); + + /* + * Turn INIT on target chip + */ + lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(apicid)); + + /* + * Send IPI + */ + + lapic_write_around(LAPIC_ICR, LAPIC_INT_LEVELTRIG | LAPIC_INT_ASSERT + | LAPIC_DM_INIT); + + printk(BIOS_SPEW, "Waiting for send to finish...\n"); + timeout = 0; + do { + printk(BIOS_SPEW, "+"); + udelay(100); + send_status = lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + if (timeout >= 1000) { + printk(BIOS_ERR, "CPU %ld: First APIC write timed out. " + "Disabling\n", apicid); + // too bad. + printk(BIOS_ERR, "ESR is 0x%lx\n", lapic_read(LAPIC_ESR)); + if (lapic_read(LAPIC_ESR)) { + printk(BIOS_ERR, "Try to reset ESR\n"); + lapic_write_around(LAPIC_ESR, 0); + printk(BIOS_ERR, "ESR is 0x%lx\n", + lapic_read(LAPIC_ESR)); + } + return 0; + } +#if !CONFIG_CPU_AMD_MODEL_10XXX && !CONFIG_CPU_INTEL_MODEL_206AX + mdelay(10); +#endif + + printk(BIOS_SPEW, "Deasserting INIT.\n"); + + /* Target chip */ + lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(apicid)); + + /* Send IPI */ + lapic_write_around(LAPIC_ICR, LAPIC_INT_LEVELTRIG | LAPIC_DM_INIT); + + printk(BIOS_SPEW, "Waiting for send to finish...\n"); + timeout = 0; + do { + printk(BIOS_SPEW, "+"); + udelay(100); + send_status = lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + if (timeout >= 1000) { + printk(BIOS_ERR, "CPU %ld: Second apic write timed out. " + "Disabling\n", apicid); + // too bad. + return 0; + } + + start_eip = get_valid_start_eip((unsigned long)_secondary_start); + +#if !CONFIG_CPU_AMD_MODEL_10XXX + num_starts = 2; +#else + num_starts = 1; +#endif + + /* + * Run STARTUP IPI loop. + */ + printk(BIOS_SPEW, "#startup loops: %d.\n", num_starts); + + maxlvt = 4; + + for (j = 1; j <= num_starts; j++) { + printk(BIOS_SPEW, "Sending STARTUP #%d to %lu.\n", j, apicid); + lapic_read_around(LAPIC_SPIV); + lapic_write(LAPIC_ESR, 0); + lapic_read(LAPIC_ESR); + printk(BIOS_SPEW, "After apic_write.\n"); + + /* + * STARTUP IPI + */ + + /* Target chip */ + lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(apicid)); + + /* Boot on the stack */ + /* Kick the second */ + lapic_write_around(LAPIC_ICR, LAPIC_DM_STARTUP + | (start_eip >> 12)); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(300); + + printk(BIOS_SPEW, "Startup point 1.\n"); + + printk(BIOS_SPEW, "Waiting for send to finish...\n"); + timeout = 0; + do { + printk(BIOS_SPEW, "+"); + udelay(100); + send_status = lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(200); + /* + * Due to the Pentium erratum 3AP. + */ + if (maxlvt > 3) { + lapic_read_around(LAPIC_SPIV); + lapic_write(LAPIC_ESR, 0); + } + accept_status = (lapic_read(LAPIC_ESR) & 0xEF); + if (send_status || accept_status) + break; + } + printk(BIOS_SPEW, "After Startup.\n"); + if (send_status) + printk(BIOS_WARNING, "APIC never delivered???\n"); + if (accept_status) + printk(BIOS_WARNING, "APIC delivery error (%lx).\n", + accept_status); + if (send_status || accept_status) + return 0; + return 1; +} + +/* Number of cpus that are currently running in coreboot */ +static atomic_t active_cpus = ATOMIC_INIT(1); + +/* start_cpu_lock covers last_cpu_index and secondary_stack. + * Only starting one cpu at a time let's me remove the logic + * for select the stack from assembly language. + * + * In addition communicating by variables to the cpu I + * am starting allows me to verify it has started before + * start_cpu returns. + */ + +static spinlock_t start_cpu_lock = SPIN_LOCK_UNLOCKED; +static unsigned int last_cpu_index = 0; +static void *stacks[CONFIG_MAX_CPUS]; +volatile unsigned long secondary_stack; +volatile unsigned int secondary_cpu_index; + +int start_cpu(device_t cpu) +{ + extern unsigned char _estack[]; + struct cpu_info *info; + unsigned long stack_end; + unsigned long stack_base; + unsigned long *stack; + unsigned long apicid; + unsigned int index; + unsigned long count; + int i; + int result; + + spin_lock(&start_cpu_lock); + + /* Get the CPU's apicid */ + apicid = cpu->path.apic.apic_id; + + /* Get an index for the new processor */ + index = ++last_cpu_index; + + /* Find end of the new processor's stack */ + stack_end = ((unsigned long)_estack) - (CONFIG_STACK_SIZE*index) - + sizeof(struct cpu_info); + + stack_base = ((unsigned long)_estack) - (CONFIG_STACK_SIZE*(index+1)); + printk(BIOS_SPEW, "CPU%d: stack_base %p, stack_end %p\n", index, + (void *)stack_base, (void *)stack_end); + /* poison the stack */ + for(stack = (void *)stack_base, i = 0; i < CONFIG_STACK_SIZE; i++) + stack[i/sizeof(*stack)] = 0xDEADBEEF; + stacks[index] = stack; + /* Record the index and which CPU structure we are using */ + info = (struct cpu_info *)stack_end; + info->index = index; + info->cpu = cpu; + + /* Advertise the new stack and index to start_cpu */ + secondary_stack = stack_end; + secondary_cpu_index = index; + + /* Until the CPU starts up report the CPU is not enabled */ + cpu->enabled = 0; + cpu->initialized = 0; + + /* Start the cpu */ + result = lapic_start_cpu(apicid); + + if (result) { + result = 0; + /* Wait 1s or until the new cpu calls in */ + for(count = 0; count < 100000 ; count++) { + if (secondary_stack == 0) { + result = 1; + break; + } + udelay(10); + } + } + secondary_stack = 0; + spin_unlock(&start_cpu_lock); + return result; +} + +#if CONFIG_AP_IN_SIPI_WAIT + +/** + * Sending INIT IPI to self is equivalent of asserting #INIT with a bit of + * delay. + * An undefined number of instruction cycles will complete. All global locks + * must be released before INIT IPI and no printk is allowed after this. + * De-asserting INIT IPI is a no-op on later Intel CPUs. + * + * If you set DEBUG_HALT_SELF to 1, printk's after INIT IPI are enabled + * but running thread may halt without releasing the lock and effectively + * deadlock other CPUs. + */ +#define DEBUG_HALT_SELF 0 + +/** + * Normally this function is defined in lapic.h as an always inline function + * that just keeps the CPU in a hlt() loop. This does not work on all CPUs. + * I think all hyperthreading CPUs might need this version, but I could only + * verify this on the Intel Core Duo + */ +void stop_this_cpu(void) +{ + int timeout; + unsigned long send_status; + unsigned long id; + + id = lapic_read(LAPIC_ID) >> 24; + + printk(BIOS_DEBUG, "CPU %ld going down...\n", id); + + /* send an LAPIC INIT to myself */ + lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(id)); + lapic_write_around(LAPIC_ICR, LAPIC_INT_LEVELTRIG | + LAPIC_INT_ASSERT | LAPIC_DM_INIT); + + /* wait for the ipi send to finish */ +#if DEBUG_HALT_SELF + printk(BIOS_SPEW, "Waiting for send to finish...\n"); +#endif + timeout = 0; + do { +#if DEBUG_HALT_SELF + printk(BIOS_SPEW, "+"); +#endif + udelay(100); + send_status = lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + if (timeout >= 1000) { +#if DEBUG_HALT_SELF + printk(BIOS_ERR, "timed out\n"); +#endif + } + mdelay(10); + +#if DEBUG_HALT_SELF + printk(BIOS_SPEW, "Deasserting INIT.\n"); +#endif + /* Deassert the LAPIC INIT */ + lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(id)); + lapic_write_around(LAPIC_ICR, LAPIC_INT_LEVELTRIG | LAPIC_DM_INIT); + +#if DEBUG_HALT_SELF + printk(BIOS_SPEW, "Waiting for send to finish...\n"); +#endif + timeout = 0; + do { +#if DEBUG_HALT_SELF + printk(BIOS_SPEW, "+"); +#endif + udelay(100); + send_status = lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + if (timeout >= 1000) { +#if DEBUG_HALT_SELF + printk(BIOS_ERR, "timed out\n"); +#endif + } + + while(1) { + hlt(); + } +} +#endif + + +/* C entry point of secondary cpus */ +void secondary_cpu_init(unsigned int index) +{ + atomic_inc(&active_cpus); +#if CONFIG_SERIAL_CPU_INIT + spin_lock(&start_cpu_lock); +#endif + +#ifdef __SSE3__ + /* + * Seems that CR4 was cleared when AP start via lapic_start_cpu() + * Turn on CR4.OSFXSR and CR4.OSXMMEXCPT when SSE options enabled + */ + u32 cr4_val; + cr4_val = readcr4(); + cr4_val |= (1 << 9 | 1 << 10); + writecr4(cr4_val); +#endif + cpu_initialize(index); +#if CONFIG_SERIAL_CPU_INIT + spin_unlock(&start_cpu_lock); +#endif + + atomic_dec(&active_cpus); + + stop_this_cpu(); +} + +static void start_other_cpus(struct bus *cpu_bus, device_t bsp_cpu) +{ + device_t cpu; + /* Loop through the cpus once getting them started */ + + for(cpu = cpu_bus->children; cpu ; cpu = cpu->sibling) { + if (cpu->path.type != DEVICE_PATH_APIC) { + continue; + } + #if !CONFIG_SERIAL_CPU_INIT + if(cpu==bsp_cpu) { + continue; + } + #endif + + if (!cpu->enabled) { + continue; + } + + if (cpu->initialized) { + continue; + } + + if (!start_cpu(cpu)) { + /* Record the error in cpu? */ + printk(BIOS_ERR, "CPU 0x%02x would not start!\n", + cpu->path.apic.apic_id); + } +#if CONFIG_SERIAL_CPU_INIT + udelay(10); +#endif + } + +} + +static void wait_other_cpus_stop(struct bus *cpu_bus) +{ + device_t cpu; + int old_active_count, active_count; + long loopcount = 0; + int i; + + /* Now loop until the other cpus have finished initializing */ + old_active_count = 1; + active_count = atomic_read(&active_cpus); + while(active_count > 1) { + if (active_count != old_active_count) { + printk(BIOS_INFO, "Waiting for %d CPUS to stop\n", + active_count - 1); + old_active_count = active_count; + } + udelay(10); + active_count = atomic_read(&active_cpus); + loopcount++; + } + for(cpu = cpu_bus->children; cpu; cpu = cpu->sibling) { + if (cpu->path.type != DEVICE_PATH_APIC) { + continue; + } + if (cpu->path.apic.apic_id == SPEEDSTEP_APIC_MAGIC) { + continue; + } + if (!cpu->initialized) { + printk(BIOS_ERR, "CPU 0x%02x did not initialize!\n", + cpu->path.apic.apic_id); + } + } + printk(BIOS_DEBUG, "All AP CPUs stopped (%ld loops)\n", loopcount); + for(i = 1; i <= last_cpu_index; i++){ + unsigned long *stack = stacks[i]; + int lowest; + int maxstack = (CONFIG_STACK_SIZE - sizeof(struct cpu_info)) + /sizeof(*stack) - 1; + if (stack[0] != 0xDEADBEEF) + printk(BIOS_ERR, "CPU%d overran its stack\n", i); + for(lowest = 0; lowest < maxstack; lowest++) + if (stack[lowest] != 0xDEADBEEF) + break; + printk(BIOS_SPEW, "CPU%d: stack allocated from %p to %p:", i, + stack, &stack[maxstack]); + printk(BIOS_SPEW, "lowest stack address was %p\n", + &stack[lowest]); + } +} + +void initialize_cpus(struct bus *cpu_bus) +{ + struct device_path cpu_path; + struct cpu_info *info; + + /* Find the info struct for this cpu */ + info = cpu_info(); + +#if NEED_LAPIC == 1 + /* Ensure the local apic is enabled */ + enable_lapic(); + + /* Get the device path of the boot cpu */ + cpu_path.type = DEVICE_PATH_APIC; + cpu_path.apic.apic_id = lapicid(); +#else + /* Get the device path of the boot cpu */ + cpu_path.type = DEVICE_PATH_CPU; + cpu_path.cpu.id = 0; +#endif + + /* Find the device structure for the boot cpu */ + info->cpu = alloc_find_dev(cpu_bus, &cpu_path); + +#if CONFIG_SMP && CONFIG_MAX_CPUS > 1 + // why here? In case some day we can start core1 in amd_sibling_init + copy_secondary_start_to_lowest_1M(); +#endif + +#if CONFIG_HAVE_SMI_HANDLER + smm_init(); +#endif + + cpus_ready_for_init(); + +#if CONFIG_SMP && CONFIG_MAX_CPUS > 1 + #if !CONFIG_SERIAL_CPU_INIT + /* start all aps at first, so we can init ECC all together */ + start_other_cpus(cpu_bus, info->cpu); + #endif +#endif + + /* Initialize the bootstrap processor */ + cpu_initialize(0); + +#if CONFIG_SMP && CONFIG_MAX_CPUS > 1 + #if CONFIG_SERIAL_CPU_INIT + start_other_cpus(cpu_bus, info->cpu); + #endif + + /* Now wait the rest of the cpus stop*/ + wait_other_cpus_stop(cpu_bus); +#endif +} + +#else /* CONFIG_BROADCAST_SIPI == 1*/ + +#if CONFIG_SMP && CONFIG_MAX_CPUS > 1
static struct bus *current_cpu_bus;
static int lapic_start_cpus(struct bus *cpu_bus) { int timeout; - unsigned long send_status, accept_status; + unsigned long send_status, accept_status, start_eip; int maxlvt;
/* @@ -99,6 +614,7 @@ static int lapic_start_cpus(struct bus *cpu_bus) } return 0; } + start_eip = get_valid_start_eip((unsigned long)_secondary_start);
maxlvt = 4;
@@ -114,11 +630,15 @@ static int lapic_start_cpus(struct bus *cpu_bus)
/* Target chip */ lapic_write_around(LAPIC_ICR2, 0); + printk(BIOS_SPEW, "After apic_write_around(LAPIC_ICR2, 0);\n");
/* Boot on the stack */ /* Kick the second */ - lapic_write_around(LAPIC_ICR, LAPIC_INT_ASSERT | LAPIC_DM_STARTUP | LAPIC_DEST_ALLBUT - | ((AP_SIPI_VECTOR >> 12) & 0xff)); + lapic_write_around(LAPIC_ICR, + LAPIC_INT_ASSERT | LAPIC_DM_STARTUP | LAPIC_DEST_ALLBUT + | (start_eip >> 12)); + printk(BIOS_SPEW, "After apic_write_around allbut starteip %p.\n", + (void *)start_eip);
/* * Give the other CPU some time to accept the IPI. @@ -152,7 +672,8 @@ static int lapic_start_cpus(struct bus *cpu_bus) if (send_status) printk(BIOS_WARNING, "APIC never delivered???\n"); if (accept_status) - printk(BIOS_WARNING, "APIC delivery error (%lx).\n", accept_status); + printk(BIOS_WARNING, "APIC delivery error (%lx).\n", + accept_status); if (send_status || accept_status) return 0; return 1; @@ -171,7 +692,8 @@ static void stop_all_ap_cpus(void) int timeout; /* send an LAPIC INIT to all but myself */ lapic_write_around(LAPIC_ICR2, 0); - lapic_write_around(LAPIC_ICR, LAPIC_INT_ASSERT | LAPIC_DM_INIT | LAPIC_DEST_ALLBUT); + lapic_write_around(LAPIC_ICR, + LAPIC_INT_ASSERT | LAPIC_DM_INIT | LAPIC_DEST_ALLBUT);
/* wait for the ipi send to finish */ printk(BIOS_SPEW, "Waiting for send to finish...\n"); @@ -187,26 +709,6 @@ static void stop_all_ap_cpus(void) mdelay(10); }
-#ifdef __SSE3__ -static __inline__ __attribute__((always_inline)) unsigned long readcr4(void) -{ - unsigned long value; - __asm__ __volatile__ ( - "mov %%cr4, %[value]" - : [value] "=a" (value)); - return value; -} - -static __inline__ __attribute__((always_inline)) void writecr4(unsigned long Data) -{ - __asm__ __volatile__ ( - "mov %%eax, %%cr4" - : - : "a" (Data) - ); -} -#endif - /* C entry point of secondary cpus */ void secondary_cpu_init(int index) { @@ -236,7 +738,8 @@ static void wait_other_cpus_stop(struct bus *cpu_bus) active_count = atomic_read(&active_cpus); while(active_count > 1) { if (active_count != old_active_count) { - printk(BIOS_INFO, "Waiting for %d CPUS to stop\n", active_count - 1); + printk(BIOS_INFO, "Waiting for %d CPUS to stop\n", + active_count - 1); old_active_count = active_count; } udelay(10); @@ -257,9 +760,26 @@ static void wait_other_cpus_stop(struct bus *cpu_bus) } stop_all_ap_cpus(); printk(BIOS_DEBUG, "All AP CPUs stopped (%ld loops)\n", loopcount); +#if 0 + for(i = 1; i <= last_cpu_index; i++){ + unsigned long *stack = stacks[i]; + int lowest; + int maxstack = (CONFIG_STACK_SIZE - sizeof(struct cpu_info)) + /sizeof(*stack) - 1; + if (stack[0] != 0xDEADBEEF) + printk(BIOS_ERR, "CPU%d overran its stack\n", i); + for(lowest = 0; lowest < maxstack; lowest++) + if (stack[lowest] != 0xDEADBEEF) + break; + printk(BIOS_SPEW, "CPU%d: stack allocated from %p to %p:", i, + stack, &stack[maxstack]); + printk(BIOS_SPEW, "lowest stack address was %p\n", + &stack[lowest]); + } +#endif }
-#endif /* CONFIG_SMP */ +#endif /* CONFIG_SMP and MAX_CPUS > 1 */
void initialize_cpus(struct bus *cpu_bus) { @@ -279,7 +799,8 @@ void initialize_cpus(struct bus *cpu_bus) #endif
#if CONFIG_SMP - copy_secondary_start_to_1m_below(); // why here? In case some day we can start core1 in amd_sibling_init + // why here? In case some day we can start core1 in amd_sibling_init + copy_secondary_start_to_lowest_1M(); #endif
#if CONFIG_HAVE_SMI_HANDLER @@ -297,4 +818,4 @@ void initialize_cpus(struct bus *cpu_bus) wait_other_cpus_stop(cpu_bus); #endif } - +#endif /* CONFIG_BROADCAST_SIPI == 1 */ diff --git a/src/cpu/x86/lapic/secondary.S b/src/cpu/x86/lapic/secondary.S index e6650ec..a6d5ea3 100644 --- a/src/cpu/x86/lapic/secondary.S +++ b/src/cpu/x86/lapic/secondary.S @@ -2,7 +2,11 @@ #include <cpu/x86/lapic_def.h>
.text - .globl _secondary_start, _secondary_start_end, cpucount, ap_protected_start + .globl _secondary_start, _secondary_start_end, ap_protected_start +#if CONFIG_BROADCAST_SIPI == 1 + .globl cpucount +#endif + .balign 4096 _secondary_start: .code16 cli @@ -26,12 +30,6 @@ _secondary_start:
ljmpl $0x10, $__ap_protected_start
-gdtaddr: - .word gdt_limit /* the table limit */ - .long gdt /* we know the offset */ - -_secondary_start_end: - ap_protected_start: .code32 lgdt gdtaddr @@ -49,6 +47,14 @@ __ap_protected_start: /* Load the Interrupt descriptor table */ lidt idtarg
+#if CONFIG_BROADCAST_SIPI == 0 + /* Set the stack pointer, and flag that we are done */ + xorl %eax, %eax + movl secondary_stack, %esp + movl secondary_cpu_index, %edi + pushl %edi + movl %eax, secondary_stack +#else /* CONFIG_BROADCAST_SIPI */ /* increment our cpu index */ movl $1, %eax lock xadd %eax, cpucount @@ -61,10 +67,19 @@ __ap_protected_start: add %eax, %esp
pushl %ecx +#endif /* CONFIG_BROADCAST_SIPI */ + call secondary_cpu_init 1: hlt jmp 1b
+#if CONFIG_BROADCAST_SIPI == 1 cpucount: .long 1 +#endif /* CONFIG_BROADCAST_SIPI */ +gdtaddr: + .word gdt_limit /* the table limit */ + .long gdt /* we know the offset */ + +_secondary_start_end: .code32 diff --git a/src/cpu/x86/pae/pgtbl.c b/src/cpu/x86/pae/pgtbl.c index 7aa17c2..f84ec6e 100644 --- a/src/cpu/x86/pae/pgtbl.c +++ b/src/cpu/x86/pae/pgtbl.c @@ -7,6 +7,15 @@ #include <cpu/cpu.h> #include <cpu/x86/pae.h> #include <cpu/x86/lapic.h> + +#if CONFIG_BROADCAST_SIPI == 1 +#include <device/device.h> +#endif /* CONFIG_BROADCAST_SIPI */ +#include <cpu/cpu.h> +#include <cpu/x86/pae.h> +#if CONFIG_BROADCAST_SIPI == 1 +#include <cpu/x86/lapic.h> +#endif /* CONFIG_BROADCAST_SIPI */ #include <string.h>
static void paging_off(void) @@ -45,6 +54,7 @@ static void paging_on(void *pdp) ); }
+#if CONFIG_BROADCAST_SIPI == 1 static int cpu_index(void) { device_t dev = dev_find_lapic(lapicid()); @@ -53,6 +63,7 @@ static int cpu_index(void) return dev->path.apic.index; }
+#endif /* CONFIG_BROADCAST_SIPI */ void *map_2M_page(unsigned long page) { struct pde { diff --git a/src/drivers/i2c/w83795/w83795.c b/src/drivers/i2c/w83795/w83795.c index 12be4da..14b12b3 100644 --- a/src/drivers/i2c/w83795/w83795.c +++ b/src/drivers/i2c/w83795/w83795.c @@ -22,7 +22,9 @@ #include <console/console.h> #include <device/device.h> #include "southbridge/amd/cimx/sb700/smbus.h" /*SMBUS_IO_BASE*/ +#if CONFIG_BROADCAST_SIPI == 1 #include <cpu/x86/lapic.h> +#endif /* CONFIG_BROADCAST_SIPI */ #include "w83795.h"
static u32 w83795_set_bank(u8 bank) @@ -225,8 +227,13 @@ static void w83795_init(w83795_fan_mode_t mode, u8 dts_src) static void w83795_hwm_init(device_t dev) { struct device *cpu; - +#if CONFIG_BROADCAST_SIPI == 0 + struct cpu_info *info; + info = cpu_info(); + cpu = info->cpu; +#else /* CONFIG_BROADCAST_SIPI */ cpu = dev_find_lapic(lapicid()); +#endif /* CONFIG_BROADCAST_SIPI */ if (!cpu) die("CPU: missing cpu device structure");
diff --git a/src/include/cpu/cpu.h b/src/include/cpu/cpu.h index 9765dfd..0bf85d6 100644 --- a/src/include/cpu/cpu.h +++ b/src/include/cpu/cpu.h @@ -4,12 +4,18 @@ #include <arch/cpu.h>
#if !defined(__ROMCC__) + +#if CONFIG_BROADCAST_SIPI == 0 +void secondary_cpu_init(unsigned int cpu_index)__attribute__((regparm(0))); +void cpu_initialize(unsigned int cpu_index); +#else /* CONFIG_BROADCAST_SIPI */ +void secondary_cpu_init(int index); +extern unsigned int cpucount; void cpu_initialize(struct bus *cpu_bus, int index); +#endif /* CONFIG_BROADCAST_SIPI */ + struct bus; void initialize_cpus(struct bus *cpu_bus); -void secondary_cpu_init(int index); - -extern unsigned int cpucount;
#if !CONFIG_WAIT_BEFORE_CPUS_INIT #define cpus_ready_for_init() do {} while(0)