Ronald G. Minnich (rminnich@gmail.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/877
-gerrit
commit f10e25754cc2650417500d629aa788a6e3384707 Author: Ron Minnich rminnich@gmail.com Date: Thu Apr 5 23:51:18 2012 -0700
Add multicore support to coreboot
This set of changes supports the ability for multicore support in coreboot. To give people a chance to examine the structure, and to make bisecting for bugs easier, we are implementing the patch in 3 stages. This first stage introduces the basic mechanism but does not make any visible change in behavior. The APs, instead of being immediately spun down, are asked to run one debug print function and then spun down.
The means by which APs are tasked was implemented in the NIX operating system in 2011, see: http://code.google.com/p/nix-os/ The APs come alive and spin on a memory location, contained in a per-AP structure. Tasking is accomplished by setting parameters into an argument array and then writing a function pointer into the memory location. The AP indicates completion by writing zero to the memory location. The BSP can wait for the AP to finish (synchronous) or going off to do other work (asynchronous). Wait times are in units of microseconds.
This way of tasking APs is incredibly cheap and fast: in the minimal case, one memory write suffices to launch an AP into doing work.
This code has been tested on a sandybridge system (chromebook) and on another 4-core sandybridge system.
Take the opportunity to remove the macro definition whose origins few people remember. Change-Id: I19f8587562fd499e98457aa9f11b52400c105697 Signed-off-by: Ron Minnich rminnich@gmail.com --- src/arch/x86/include/arch/cpu.h | 5 + src/arch/x86/lib/cpu.c | 28 ++++++- src/cpu/x86/lapic/lapic_cpu_init.c | 147 ++++++++++++++++++++++++++++++++---- src/cpu/x86/lapic/secondary.S | 2 + src/include/cpu/cpu.h | 8 ++- 5 files changed, 169 insertions(+), 21 deletions(-)
diff --git a/src/arch/x86/include/arch/cpu.h b/src/arch/x86/include/arch/cpu.h index 604abde..34d0856 100644 --- a/src/arch/x86/include/arch/cpu.h +++ b/src/arch/x86/include/arch/cpu.h @@ -157,9 +157,14 @@ struct cpu_driver { struct device; struct cpu_driver *find_cpu_driver(struct device *cpu);
+typedef u32 (*workfunc)(u32, u32, u32); + struct cpu_info { device_t cpu; unsigned long index; + workfunc work; + u32 params[3]; + u32 result; };
static inline struct cpu_info *cpu_info(void) diff --git a/src/arch/x86/lib/cpu.c b/src/arch/x86/lib/cpu.c index 98ede06..29ccccc 100644 --- a/src/arch/x86/lib/cpu.c +++ b/src/arch/x86/lib/cpu.c @@ -234,7 +234,7 @@ static void set_cpu_ops(struct device *cpu) cpu->ops = driver ? driver->ops : NULL; }
-void cpu_initialize(void) +void cpu_initialize(struct cpu_info *info) { /* Because we busy wait at the printk spinlock. * It is important to keep the number of printed messages @@ -242,12 +242,9 @@ void cpu_initialize(void) * disabled. */ struct device *cpu; - struct cpu_info *info; struct cpuinfo_x86 c;
- info = cpu_info(); - - printk(BIOS_INFO, "Initializing CPU #%ld\n", info->index); + printk(BIOS_INFO, "cpu_initialize: CPU #%ld\n", info->index);
cpu = info->cpu; if (!cpu) { @@ -289,3 +286,24 @@ void cpu_initialize(void) return; }
+void cpu_work(struct cpu_info *info) +{ + workfunc f; + volatile workfunc *ptr = &info->work; + volatile u32 *params = info->params; + + printk(BIOS_INFO, "CPU #%ld ready to work\n", info->index); + + while (!*ptr) + ; + f = *ptr; + + printk(BIOS_SPEW, "CPU #%ld is asked to do %p\n", info->index, f); + info->result = f(params[0], params[1], params[2]); + + printk(BIOS_SPEW, "CPU #%ld finishes %p, mark %p\n", info->index, f, ptr); + *ptr = 0; + + printk(BIOS_INFO, "CPU #%ld leaving cpu_work()\n", info->index); +} + diff --git a/src/cpu/x86/lapic/lapic_cpu_init.c b/src/cpu/x86/lapic/lapic_cpu_init.c index 2ac9093..227cb06 100644 --- a/src/cpu/x86/lapic/lapic_cpu_init.c +++ b/src/cpu/x86/lapic/lapic_cpu_init.c @@ -17,6 +17,20 @@ #include <cpu/intel/speedstep.h>
#if CONFIG_SMP == 1 + +/* We do not want this struct to be visible outside this file. + * The external interface is via functions. + */ + +struct apcore { + u32 stack[4096 - sizeof(struct cpu_info)]; + struct cpu_info info; +}; + +#define TOS(x) (&apcores[(x)].stack[ARRAY_SIZE(apcores[x].stack)-1]) + +struct apcore apcores[CONFIG_MAX_CPUS]; + /* This is a lot more paranoid now, since Linux can NOT handle * being told there is a CPU when none exists. So any errors * will return 0, meaning no CPU. @@ -221,19 +235,22 @@ static atomic_t active_cpus = ATOMIC_INIT(1); * start_cpu returns. */
+/* N.B. if we move to serial smp init we don't need this spin lock. + * Consider for the future. + */ + static spinlock_t start_cpu_lock = SPIN_LOCK_UNLOCKED; -static unsigned last_cpu_index = 0; -volatile unsigned long secondary_stack; +static unsigned int last_cpu_index = 0; +volatile unsigned long secondary_stack, secondary_cpu_info;
int start_cpu(device_t cpu) { - extern unsigned char _estack[]; struct cpu_info *info; unsigned long stack_end; unsigned long apicid; - unsigned long index; unsigned long count; int result; + unsigned int index = last_cpu_index + 1;
spin_lock(&start_cpu_lock);
@@ -241,19 +258,27 @@ int start_cpu(device_t cpu) apicid = cpu->path.apic.apic_id;
/* Get an index for the new processor */ - index = ++last_cpu_index; + if (index >= CONFIG_MAX_CPUS){ + printk(BIOS_ERR, "CONFIG_MAX_CPUS(%d) too small!\n", CONFIG_MAX_CPUS); + spin_unlock(&start_cpu_lock); + return 0; + } + + last_cpu_index = index;
/* Find end of the new processors stack */ - stack_end = ((unsigned long)_estack) - (CONFIG_STACK_SIZE*index) - sizeof(struct cpu_info); + stack_end = (unsigned long) TOS(index);
/* Record the index and which cpu structure we are using */ - info = (struct cpu_info *)stack_end; + info = &apcores[index].info; info->index = index; info->cpu = cpu;
/* Advertise the new stack to start_cpu */ secondary_stack = stack_end; - + secondary_cpu_info = (unsigned long) info; + printk(BIOS_SPEW, "start_cpu CPU %d secondary_stack %#lx info %p\n", + index, secondary_stack, info); /* Until the cpu starts up report the cpu is not enabled */ cpu->enabled = 0; cpu->initialized = 0; @@ -381,7 +406,7 @@ static __inline__ __attribute__((always_inline)) void writecr4(unsigned long Dat #endif
/* C entry point of secondary cpus */ -void secondary_cpu_init(void) +void secondary_cpu_init(struct cpu_info *info) { atomic_inc(&active_cpus); #if CONFIG_SERIAL_CPU_INIT == 1 @@ -398,11 +423,11 @@ void secondary_cpu_init(void) cr4_val |= (1 << 9 | 1 << 10); writecr4(cr4_val); #endif - cpu_initialize(); + cpu_initialize(info); #if CONFIG_SERIAL_CPU_INIT == 1 spin_unlock(&start_cpu_lock); #endif - + cpu_work(info); atomic_dec(&active_cpus);
stop_this_cpu(); @@ -476,8 +501,6 @@ static void wait_other_cpus_stop(struct bus *cpu_bus) printk(BIOS_DEBUG, "All AP CPUs stopped (%ld loops)\n", loopcount); }
-#else /* CONFIG_SMP */ -#define initialize_other_cpus(root) do {} while(0) #endif /* CONFIG_SMP */
void initialize_cpus(struct bus *cpu_bus) @@ -522,7 +545,7 @@ void initialize_cpus(struct bus *cpu_bus) #endif
/* Initialize the bootstrap processor */ - cpu_initialize(); + cpu_initialize(info);
#if CONFIG_SMP == 1 #if CONFIG_SERIAL_CPU_INIT == 1 @@ -534,3 +557,99 @@ void initialize_cpus(struct bus *cpu_bus) #endif }
+#if CONFIG_SMP == 1 +/* work primitives */ + +/* start_work is only intended to be called by the bsp. */ +/* A built in assumption of this code is that you know what + * you're doing. This is firmware, not pthreads. You should + * not call this function for a core if: + * - the core does not exist + * - the core is not initialized + * - the core is busy + * Any of these return a -1, else the core is started and + * 0 is returned. + */ +static int start_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c) +{ + struct cpu_info *info; + volatile workfunc *ptr; + volatile u32 *params; + + if (core >= CONFIG_MAX_CPUS){ + printk(BIOS_EMERG, "start_work: invalid core %d\n", core); + return -1; + } + info = &apcores[core].info; + if (! info->cpu->initialized){ + printk(BIOS_EMERG, "start_work: core not initialized %d\n", core); + return -1; + } + ptr = &info->work; + if (*ptr){ + printk(BIOS_EMERG, "start_work: core is busy %d\n", core); + return -1; + } + params = (u32 *)&info->params; + params[0] = a; + params[1] = b; + params[2] = c; + + printk(BIOS_INFO, "BSP starts work on core %d\n", core); + + /* This interface depends on arguments being written, + * then the function pointer being written. + */ + barrier(); + *ptr = f; + return 0; +} + +/* wait for the work to finish and return the result. Wait at + * most maxusec microseconds. + */ +static int wait_work(unsigned int core, u32 *retval, unsigned int maxusec) +{ + unsigned int usec; + struct cpu_info *info; + volatile workfunc *ptr; + u32 result; + + if (core >= CONFIG_MAX_CPUS){ + printk(BIOS_EMERG, "start_work: invalid core %d\n", core); + return -1; + } + info = &apcores[core].info; + if (! info->cpu->initialized){ + printk(BIOS_EMERG, "start_work: core not initialized %d\n", core); + return -1; + } + ptr = &info->work; + for(usec = 0; usec < maxusec && *ptr; usec++) + udelay(1); + + /* N.B. since only the BSP starts cores, there is not + * problem checking the pointer since access to it is + * serialized by the single-threaded BSP code. + */ + if (*ptr){ + printk(BIOS_INFO, "core %d still running after %d microseconds\n", + core, usec); + return -1; + } + result = info->result; + printk(BIOS_INFO, "Result is %#x\n", result); + if (retval) + *retval = result; + return 0; +} + +/* start the work and wait for it to finish */ +int run_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c, u32 *retval, + unsigned int timeout) +{ + if (start_work(core, f, a, b, c)) + return -1; + return wait_work(core, retval, timeout); +} +#endif /* CONFIG_SMP == 1 */ diff --git a/src/cpu/x86/lapic/secondary.S b/src/cpu/x86/lapic/secondary.S index dc00b08..84e0c6e 100644 --- a/src/cpu/x86/lapic/secondary.S +++ b/src/cpu/x86/lapic/secondary.S @@ -42,6 +42,8 @@ _secondary_start: xorl %eax, %eax movl secondary_stack, %esp movl %eax, secondary_stack + movl secondary_cpu_info, %eax + pushl %eax
call secondary_cpu_init 1: hlt diff --git a/src/include/cpu/cpu.h b/src/include/cpu/cpu.h index cca2be1..362c53c 100644 --- a/src/include/cpu/cpu.h +++ b/src/include/cpu/cpu.h @@ -5,9 +5,13 @@ struct device; struct bus; #include <arch/cpu.h>
-void cpu_initialize(void); +void cpu_initialize(struct cpu_info *info); void initialize_cpus(struct bus *cpu_bus); -void secondary_cpu_init(void); +void secondary_cpu_init(struct cpu_info *info); +int run_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c, u32 *retval, + unsigned int timeout); +void cpu_work(struct cpu_info *info); +
#if !CONFIG_WAIT_BEFORE_CPUS_INIT #define cpus_ready_for_init() do {} while(0)