Ronald G. Minnich (rminnich(a)gmail.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/877
-gerrit
commit 83d27ab81445e3f9cc88ed77ec52800c5c7b4321
Author: Ron Minnich <rminnich(a)gmail.com>
Date: Thu Apr 5 23:51:18 2012 -0700
Add multicore support to coreboot
This set of changes supports the ability for multicore support in coreboot.
To give people a chance to examine the structure, and to make bisecting
for bugs easier, we are implementing the patch in 3 stages. This first stage
introduces the basic mechanism but does not make any visible change in
behavior. The APs, instead of being immediately spun down, are asked to
run one debug print function and then spun down.
The means by which APs are tasked was implemented in the NIX operating
system in 2011, see: http://code.google.com/p/nix-os/
The APs come alive and spin on a memory location, contained
in a per-AP structure. Tasking is accomplished
by setting parameters into an argument array and then writing a function
pointer into the memory location. The AP indicates completion by writing
zero to the memory location. The BSP can wait for the AP to finish
(synchronous) or going off to do other work (asynchronous). Wait times
are in units of microseconds.
This way of tasking APs is incredibly cheap and fast: in the minimal
case, one memory write suffices to launch an AP into doing work.
This code has been tested on a sandybridge system (chromebook)
and on another 4-core sandybridge system.
Take the opportunity to remove the macro definition whose origins
few people remember.
Change-Id: I19f8587562fd499e98457aa9f11b52400c105697
Signed-off-by: Ron Minnich <rminnich(a)gmail.com>
---
src/arch/x86/include/arch/cpu.h | 5 ++
src/arch/x86/lib/cpu.c | 30 +++++++--
src/cpu/x86/lapic/lapic_cpu_init.c | 137 +++++++++++++++++++++++++++++++++---
src/include/cpu/cpu.h | 10 ++-
4 files changed, 165 insertions(+), 17 deletions(-)
diff --git a/src/arch/x86/include/arch/cpu.h b/src/arch/x86/include/arch/cpu.h
index 604abde..34d0856 100644
--- a/src/arch/x86/include/arch/cpu.h
+++ b/src/arch/x86/include/arch/cpu.h
@@ -157,9 +157,14 @@ struct cpu_driver {
struct device;
struct cpu_driver *find_cpu_driver(struct device *cpu);
+typedef u32 (*workfunc)(u32, u32, u32);
+
struct cpu_info {
device_t cpu;
unsigned long index;
+ workfunc work;
+ u32 params[3];
+ u32 result;
};
static inline struct cpu_info *cpu_info(void)
diff --git a/src/arch/x86/lib/cpu.c b/src/arch/x86/lib/cpu.c
index 98ede06..ad028e3 100644
--- a/src/arch/x86/lib/cpu.c
+++ b/src/arch/x86/lib/cpu.c
@@ -234,7 +234,7 @@ static void set_cpu_ops(struct device *cpu)
cpu->ops = driver ? driver->ops : NULL;
}
-void cpu_initialize(void)
+void cpu_initialize(struct cpu_info *info)
{
/* Because we busy wait at the printk spinlock.
* It is important to keep the number of printed messages
@@ -242,12 +242,9 @@ void cpu_initialize(void)
* disabled.
*/
struct device *cpu;
- struct cpu_info *info;
struct cpuinfo_x86 c;
- info = cpu_info();
-
- printk(BIOS_INFO, "Initializing CPU #%ld\n", info->index);
+ printk(BIOS_INFO, "cpu_initialize: CPU #%ld\n", info->index);
cpu = info->cpu;
if (!cpu) {
@@ -289,3 +286,26 @@ void cpu_initialize(void)
return;
}
+void cpu_work(struct cpu_info *info)
+{
+ workfunc f;
+ volatile workfunc *ptr = &info->work;
+ volatile u32 *params = info->params;
+
+ printk(BIOS_INFO, "CPU #%ld ready to work\n", info->index);
+
+ while (!*ptr)
+ ;
+ f = *ptr;
+
+ printk(BIOS_SPEW, "CPU #%ld is asked to do %p\n", info->index, f);
+ f(params[0], params[1], params[2]);
+
+ printk(BIOS_SPEW, "CPU #%ld finishes %p, mark %p\n", info->index, f, ptr);
+ *ptr = 0;
+
+ printk(BIOS_INFO, "CPU #%ld leaving cpu_work()\n", info->index);
+
+ return;
+}
+
diff --git a/src/cpu/x86/lapic/lapic_cpu_init.c b/src/cpu/x86/lapic/lapic_cpu_init.c
index 2ac9093..f54f07c 100644
--- a/src/cpu/x86/lapic/lapic_cpu_init.c
+++ b/src/cpu/x86/lapic/lapic_cpu_init.c
@@ -17,6 +17,21 @@
#include <cpu/intel/speedstep.h>
#if CONFIG_SMP == 1
+
+/* we do not want this struct visible outside this file.
+ * The external interface is via functions.
+ */
+
+struct apcore {
+ u32 stack[1024 - sizeof(struct cpu_info)];
+ struct cpu_info info;
+};
+
+#define TOS(x) (&apcores[(x)].stack[ARRAY_SIZE(apcores[x].stack)-1])
+typedef struct apcore apcore_t;
+
+apcore_t apcores[CONFIG_MAX_CPUS];
+
/* This is a lot more paranoid now, since Linux can NOT handle
* being told there is a CPU when none exists. So any errors
* will return 0, meaning no CPU.
@@ -221,13 +236,16 @@ static atomic_t active_cpus = ATOMIC_INIT(1);
* start_cpu returns.
*/
+/* N.B. if we move to serial smp init we don't need this spin lock.
+ * Consider for the future.
+ */
+
static spinlock_t start_cpu_lock = SPIN_LOCK_UNLOCKED;
static unsigned last_cpu_index = 0;
volatile unsigned long secondary_stack;
int start_cpu(device_t cpu)
{
- extern unsigned char _estack[];
struct cpu_info *info;
unsigned long stack_end;
unsigned long apicid;
@@ -241,19 +259,26 @@ int start_cpu(device_t cpu)
apicid = cpu->path.apic.apic_id;
/* Get an index for the new processor */
+ if ((last_cpu_index+1) >= CONFIG_MAX_CPUS){
+ printk(BIOS_ERR, "CONFIG_MAX_CPUS(%d) too small!\n", CONFIG_MAX_CPUS);
+ spin_unlock(&start_cpu_lock);
+ return 0;
+ }
+
index = ++last_cpu_index;
/* Find end of the new processors stack */
- stack_end = ((unsigned long)_estack) - (CONFIG_STACK_SIZE*index) - sizeof(struct cpu_info);
+ stack_end = (unsigned long) TOS(index);
/* Record the index and which cpu structure we are using */
- info = (struct cpu_info *)stack_end;
+ info = &apcores[index].info;
info->index = index;
info->cpu = cpu;
/* Advertise the new stack to start_cpu */
secondary_stack = stack_end;
-
+ printk(BIOS_SPEW, "start_cpu CPU %ld secondary_stack %#lx info %p\n",
+ index, secondary_stack, info);
/* Until the cpu starts up report the cpu is not enabled */
cpu->enabled = 0;
cpu->initialized = 0;
@@ -381,8 +406,11 @@ static __inline__ __attribute__((always_inline)) void writecr4(unsigned long Dat
#endif
/* C entry point of secondary cpus */
-void secondary_cpu_init(void)
+void secondary_cpu_init(u32 infoptr)
{
+ /* note: we tried (((u32 *)&infoptr)[1]) but that failed. Compiler? */
+ struct cpu_info *info = (struct cpu_info *)(((u8*)&infoptr)+sizeof(u32));
+
atomic_inc(&active_cpus);
#if CONFIG_SERIAL_CPU_INIT == 1
spin_lock(&start_cpu_lock);
@@ -398,11 +426,11 @@ void secondary_cpu_init(void)
cr4_val |= (1 << 9 | 1 << 10);
writecr4(cr4_val);
#endif
- cpu_initialize();
+ cpu_initialize(info);
#if CONFIG_SERIAL_CPU_INIT == 1
spin_unlock(&start_cpu_lock);
#endif
-
+ cpu_work(info);
atomic_dec(&active_cpus);
stop_this_cpu();
@@ -476,8 +504,6 @@ static void wait_other_cpus_stop(struct bus *cpu_bus)
printk(BIOS_DEBUG, "All AP CPUs stopped (%ld loops)\n", loopcount);
}
-#else /* CONFIG_SMP */
-#define initialize_other_cpus(root) do {} while(0)
#endif /* CONFIG_SMP */
void initialize_cpus(struct bus *cpu_bus)
@@ -505,6 +531,7 @@ void initialize_cpus(struct bus *cpu_bus)
info->cpu = alloc_find_dev(cpu_bus, &cpu_path);
#if CONFIG_SMP == 1
+ memset(apcores, 0, sizeof(*apcores));
copy_secondary_start_to_1m_below(); // why here? In case some day we can start core1 in amd_sibling_init
#endif
@@ -522,7 +549,7 @@ void initialize_cpus(struct bus *cpu_bus)
#endif
/* Initialize the bootstrap processor */
- cpu_initialize();
+ cpu_initialize(info);
#if CONFIG_SMP == 1
#if CONFIG_SERIAL_CPU_INIT == 1
@@ -534,3 +561,93 @@ void initialize_cpus(struct bus *cpu_bus)
#endif
}
+#if CONFIG_SMP == 1
+/* work primitives */
+
+/* start_work is only intended to be called by the bsp. */
+/* A built in assumption of this code is that you know what
+ * you're doing. This is firmware, not pthreads. You should
+ * not call this function for a core if:
+ * - the core does not exist
+ * - the core is not initialized
+ * - the core is busy
+ * Any of these return a -1, else the core is started and
+ * 0 is returned.
+ */
+int start_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c)
+{
+ struct cpu_info *info;
+ volatile workfunc *ptr;
+ volatile u32 *params;
+
+ if (core >= CONFIG_MAX_CPUS){
+ printk(BIOS_EMERG, "start_work: invalid core %d\n", core);
+ return -1;
+ }
+ info = &apcores[core].info;
+ if (! info->cpu->initialized){
+ printk(BIOS_EMERG, "start_work: core not initialized %d\n", core);
+ return -1;
+ }
+ ptr = &info->work;
+ if (*ptr){
+ printk(BIOS_EMERG, "start_work: core is busy %d\n", core);
+ return -1;
+ }
+ params = (u32 *)&info->params;
+ params[0] = a;
+ params[1] = b;
+ params[2] = c;
+ printk(BIOS_INFO, "BSP starts work on core %d\n", core);
+ *ptr = f;
+ return 0;
+}
+
+/* wait for the work to finish and return the result. Wait at
+ * most maxusec microseconds.
+ */
+int wait_work(unsigned int core, u32 *retval, unsigned int maxusec)
+{
+ unsigned int usec;
+ struct cpu_info *info;
+ volatile workfunc *ptr;
+ volatile u32 *result;
+
+ if (core >= CONFIG_MAX_CPUS){
+ printk(BIOS_EMERG, "start_work: invalid core %d\n", core);
+ return -1;
+ }
+ info = &apcores[core].info;
+ if (! info->cpu->initialized){
+ printk(BIOS_EMERG, "start_work: core not initialized %d\n", core);
+ return -1;
+ }
+ ptr = &info->work;
+ for(usec = 0; usec < maxusec && *ptr; usec++)
+ udelay(1);
+
+ /* N.B. since only the BSP starts cores, there is not
+ * problem checking the pointer since access to it is
+ * serialized by the single-threaded BSP code.
+ */
+ if (*ptr){
+ printk(BIOS_INFO, "core %d still running after %d microseconds\n",
+ core, usec);
+ return -1;
+ }
+ result = &info->result;
+ printk(BIOS_INFO, "info->work after result is %#lx\n", (unsigned long)*result);
+ if (retval)
+ *retval = *result;
+ return 0;
+}
+
+/* start the work and wait for it to finish */
+int run_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c, u32 *retval,
+ unsigned int timeout)
+{
+ if (start_work(core, f, a, b, c))
+ return -1;
+ return wait_work(core, retval, timeout);
+}
+#endif /* CONFIG_SMP == 1 */
diff --git a/src/include/cpu/cpu.h b/src/include/cpu/cpu.h
index cca2be1..0b7a748 100644
--- a/src/include/cpu/cpu.h
+++ b/src/include/cpu/cpu.h
@@ -5,9 +5,15 @@ struct device;
struct bus;
#include <arch/cpu.h>
-void cpu_initialize(void);
+void cpu_initialize(struct cpu_info *info);
void initialize_cpus(struct bus *cpu_bus);
-void secondary_cpu_init(void);
+void secondary_cpu_init(u32 unused);
+int start_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c);
+int wait_work(unsigned int core, u32 *retval, unsigned int maxusec);
+int run_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c, u32 *retval,
+ unsigned int timeout);
+void cpu_work(struct cpu_info *info);
+
#if !CONFIG_WAIT_BEFORE_CPUS_INIT
#define cpus_ready_for_init() do {} while(0)
Ronald G. Minnich (rminnich(a)gmail.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/877
-gerrit
commit 7ebde200d7acc1409b952b738c41be28ddf9a4e7
Author: Ron Minnich <rminnich(a)gmail.com>
Date: Thu Apr 5 23:51:18 2012 -0700
Add multicore support to coreboot
This set of changes supports the ability for multicore support in coreboot.
To give people a chance to examine the structure, and to make bisecting
for bugs easier, we are implementing the patch in 3 stages. This first stage
introduces the basic mechanism but does not make any visible change in
behavior. The APs, instead of being immediately spun down, are asked to
run one debug print function and then spun down.
The means by which APs are tasked was implemented in the NIX operating
system in 2011, see: http://code.google.com/p/nix-os/
The APs come alive and spin on a memory location, contained
in a per-AP structure. Tasking is accomplished
by setting parameters into an argument array and then writing a function
pointer into the memory location. The AP indicates completion by writing
zero to the memory location. The BSP can wait for the AP to finish
(synchronous) or going off to do other work (asynchronous). Wait times
are in units of microseconds.
This way of tasking APs is incredibly cheap and fast: in the minimal
case, one memory write suffices to launch an AP into doing work.
This code has been tested on a sandybridge system (chromebook)
and on another 4-core sandybridge system.
Take the opportunity to remove the macro definition whose origins
few people remember.
Change-Id: I19f8587562fd499e98457aa9f11b52400c105697
Signed-off-by: Ron Minnich <rminnich(a)gmail.com>
---
src/arch/x86/include/arch/cpu.h | 5 ++
src/arch/x86/lib/cpu.c | 30 +++++++--
src/cpu/x86/lapic/lapic_cpu_init.c | 132 +++++++++++++++++++++++++++++++++---
src/include/cpu/cpu.h | 10 ++-
4 files changed, 160 insertions(+), 17 deletions(-)
diff --git a/src/arch/x86/include/arch/cpu.h b/src/arch/x86/include/arch/cpu.h
index 604abde..34d0856 100644
--- a/src/arch/x86/include/arch/cpu.h
+++ b/src/arch/x86/include/arch/cpu.h
@@ -157,9 +157,14 @@ struct cpu_driver {
struct device;
struct cpu_driver *find_cpu_driver(struct device *cpu);
+typedef u32 (*workfunc)(u32, u32, u32);
+
struct cpu_info {
device_t cpu;
unsigned long index;
+ workfunc work;
+ u32 params[3];
+ u32 result;
};
static inline struct cpu_info *cpu_info(void)
diff --git a/src/arch/x86/lib/cpu.c b/src/arch/x86/lib/cpu.c
index 98ede06..ad028e3 100644
--- a/src/arch/x86/lib/cpu.c
+++ b/src/arch/x86/lib/cpu.c
@@ -234,7 +234,7 @@ static void set_cpu_ops(struct device *cpu)
cpu->ops = driver ? driver->ops : NULL;
}
-void cpu_initialize(void)
+void cpu_initialize(struct cpu_info *info)
{
/* Because we busy wait at the printk spinlock.
* It is important to keep the number of printed messages
@@ -242,12 +242,9 @@ void cpu_initialize(void)
* disabled.
*/
struct device *cpu;
- struct cpu_info *info;
struct cpuinfo_x86 c;
- info = cpu_info();
-
- printk(BIOS_INFO, "Initializing CPU #%ld\n", info->index);
+ printk(BIOS_INFO, "cpu_initialize: CPU #%ld\n", info->index);
cpu = info->cpu;
if (!cpu) {
@@ -289,3 +286,26 @@ void cpu_initialize(void)
return;
}
+void cpu_work(struct cpu_info *info)
+{
+ workfunc f;
+ volatile workfunc *ptr = &info->work;
+ volatile u32 *params = info->params;
+
+ printk(BIOS_INFO, "CPU #%ld ready to work\n", info->index);
+
+ while (!*ptr)
+ ;
+ f = *ptr;
+
+ printk(BIOS_SPEW, "CPU #%ld is asked to do %p\n", info->index, f);
+ f(params[0], params[1], params[2]);
+
+ printk(BIOS_SPEW, "CPU #%ld finishes %p, mark %p\n", info->index, f, ptr);
+ *ptr = 0;
+
+ printk(BIOS_INFO, "CPU #%ld leaving cpu_work()\n", info->index);
+
+ return;
+}
+
diff --git a/src/cpu/x86/lapic/lapic_cpu_init.c b/src/cpu/x86/lapic/lapic_cpu_init.c
index 2ac9093..aca429a 100644
--- a/src/cpu/x86/lapic/lapic_cpu_init.c
+++ b/src/cpu/x86/lapic/lapic_cpu_init.c
@@ -17,6 +17,21 @@
#include <cpu/intel/speedstep.h>
#if CONFIG_SMP == 1
+
+/* we do not want this struct visible outside this file.
+ * The external interface is via functions.
+ */
+
+struct apcore {
+ u32 stack[1024 - sizeof(struct cpu_info)];
+ struct cpu_info info;
+};
+
+#define TOS(x) (&apcores[(x)].stack[ARRAY_SIZE(apcores[x].stack)-1])
+typedef struct apcore apcore_t;
+
+apcore_t apcores[CONFIG_MAX_CPUS];
+
/* This is a lot more paranoid now, since Linux can NOT handle
* being told there is a CPU when none exists. So any errors
* will return 0, meaning no CPU.
@@ -227,7 +242,6 @@ volatile unsigned long secondary_stack;
int start_cpu(device_t cpu)
{
- extern unsigned char _estack[];
struct cpu_info *info;
unsigned long stack_end;
unsigned long apicid;
@@ -241,19 +255,25 @@ int start_cpu(device_t cpu)
apicid = cpu->path.apic.apic_id;
/* Get an index for the new processor */
+ if (last_cpu_index >= CONFIG_MAX_CPUS){
+ printk(BIOS_ERR, "CONFIG_MAX_CPUS(%d) too small!\n", CONFIG_MAX_CPUS);
+ return 0;
+ }
+
index = ++last_cpu_index;
/* Find end of the new processors stack */
- stack_end = ((unsigned long)_estack) - (CONFIG_STACK_SIZE*index) - sizeof(struct cpu_info);
+ stack_end = (unsigned long) TOS(index);
/* Record the index and which cpu structure we are using */
- info = (struct cpu_info *)stack_end;
+ info = &apcores[index].info;
info->index = index;
info->cpu = cpu;
/* Advertise the new stack to start_cpu */
secondary_stack = stack_end;
-
+ printk(BIOS_SPEW, "start_cpu CPU %ld secondary_stack %#lx info %p\n",
+ index, secondary_stack, info);
/* Until the cpu starts up report the cpu is not enabled */
cpu->enabled = 0;
cpu->initialized = 0;
@@ -381,8 +401,11 @@ static __inline__ __attribute__((always_inline)) void writecr4(unsigned long Dat
#endif
/* C entry point of secondary cpus */
-void secondary_cpu_init(void)
+void secondary_cpu_init(u32 infoptr)
{
+ /* note: we tried (((u32 *)&infoptr)[1]) but that failed. Compiler? */
+ struct cpu_info *info = (struct cpu_info *)(((u8*)&infoptr)+sizeof(u32));
+
atomic_inc(&active_cpus);
#if CONFIG_SERIAL_CPU_INIT == 1
spin_lock(&start_cpu_lock);
@@ -398,11 +421,11 @@ void secondary_cpu_init(void)
cr4_val |= (1 << 9 | 1 << 10);
writecr4(cr4_val);
#endif
- cpu_initialize();
+ cpu_initialize(info);
#if CONFIG_SERIAL_CPU_INIT == 1
spin_unlock(&start_cpu_lock);
#endif
-
+ cpu_work(info);
atomic_dec(&active_cpus);
stop_this_cpu();
@@ -476,8 +499,6 @@ static void wait_other_cpus_stop(struct bus *cpu_bus)
printk(BIOS_DEBUG, "All AP CPUs stopped (%ld loops)\n", loopcount);
}
-#else /* CONFIG_SMP */
-#define initialize_other_cpus(root) do {} while(0)
#endif /* CONFIG_SMP */
void initialize_cpus(struct bus *cpu_bus)
@@ -505,6 +526,7 @@ void initialize_cpus(struct bus *cpu_bus)
info->cpu = alloc_find_dev(cpu_bus, &cpu_path);
#if CONFIG_SMP == 1
+ memset(apcores, 0, sizeof(*apcores));
copy_secondary_start_to_1m_below(); // why here? In case some day we can start core1 in amd_sibling_init
#endif
@@ -522,7 +544,7 @@ void initialize_cpus(struct bus *cpu_bus)
#endif
/* Initialize the bootstrap processor */
- cpu_initialize();
+ cpu_initialize(info);
#if CONFIG_SMP == 1
#if CONFIG_SERIAL_CPU_INIT == 1
@@ -534,3 +556,93 @@ void initialize_cpus(struct bus *cpu_bus)
#endif
}
+#if CONFIG_SMP == 1
+/* work primitives */
+
+/* start_work is only intended to be called by the bsp. */
+/* A built in assumption of this code is that you know what
+ * you're doing. This is firmware, not pthreads. You should
+ * not call this function for a core if:
+ * - the core does not exist
+ * - the core is not initialized
+ * - the core is busy
+ * Any of these return a -1, else the core is started and
+ * 0 is returned.
+ */
+int start_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c)
+{
+ struct cpu_info *info;
+ volatile workfunc *ptr;
+ volatile u32 *params;
+
+ if (core >= CONFIG_MAX_CPUS){
+ printk(BIOS_EMERG, "start_work: invalid core %d\n", core);
+ return -1;
+ }
+ info = &apcores[core].info;
+ if (! info->cpu->initialized){
+ printk(BIOS_EMERG, "start_work: core not initialized %d\n", core);
+ return -1;
+ }
+ ptr = &info->work;
+ if (*ptr){
+ printk(BIOS_EMERG, "start_work: core is busy %d\n", core);
+ return -1;
+ }
+ params = (u32 *)&info->params;
+ params[0] = a;
+ params[1] = b;
+ params[2] = c;
+ printk(BIOS_INFO, "BSP starts work on core %d\n", core);
+ *ptr = f;
+ return 0;
+}
+
+/* wait for the work to finish and return the result. Wait at
+ * most maxusec microseconds.
+ */
+int wait_work(unsigned int core, u32 *retval, unsigned int maxusec)
+{
+ unsigned int usec;
+ struct cpu_info *info;
+ volatile workfunc *ptr;
+ volatile u32 *result;
+
+ if (core >= CONFIG_MAX_CPUS){
+ printk(BIOS_EMERG, "start_work: invalid core %d\n", core);
+ return -1;
+ }
+ info = &apcores[core].info;
+ if (! info->cpu->initialized){
+ printk(BIOS_EMERG, "start_work: core not initialized %d\n", core);
+ return -1;
+ }
+ ptr = &info->work;
+ for(usec = 0; usec < maxusec && *ptr; usec++)
+ udelay(1);
+
+ /* N.B. since only the BSP starts cores, there is not
+ * problem checking the pointer since access to it is
+ * serialized by the single-threaded BSP code.
+ */
+ if (*ptr){
+ printk(BIOS_INFO, "core %d still running after %d microseconds\n",
+ core, usec);
+ return -1;
+ }
+ result = &info->result;
+ printk(BIOS_INFO, "info->work after result is %#lx\n", (unsigned long)*result);
+ if (retval)
+ *retval = *result;
+ return 0;
+}
+
+/* start the work and wait for it to finish */
+int run_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c, u32 *retval,
+ unsigned int timeout)
+{
+ if (start_work(core, f, a, b, c))
+ return -1;
+ return wait_work(core, retval, timeout);
+}
+#endif /* CONFIG_SMP == 1 */
diff --git a/src/include/cpu/cpu.h b/src/include/cpu/cpu.h
index cca2be1..0b7a748 100644
--- a/src/include/cpu/cpu.h
+++ b/src/include/cpu/cpu.h
@@ -5,9 +5,15 @@ struct device;
struct bus;
#include <arch/cpu.h>
-void cpu_initialize(void);
+void cpu_initialize(struct cpu_info *info);
void initialize_cpus(struct bus *cpu_bus);
-void secondary_cpu_init(void);
+void secondary_cpu_init(u32 unused);
+int start_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c);
+int wait_work(unsigned int core, u32 *retval, unsigned int maxusec);
+int run_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c, u32 *retval,
+ unsigned int timeout);
+void cpu_work(struct cpu_info *info);
+
#if !CONFIG_WAIT_BEFORE_CPUS_INIT
#define cpus_ready_for_init() do {} while(0)
the following patch was just integrated into master:
commit b55e2f4611720891e645c719b1ec040ae00f88b0
Author: Jonathan A. Kollasch <jakllsch(a)kollasch.net>
Date: Sun Apr 8 11:32:34 2012 -0500
Actually return %ebx value from cpuid_ebx()
Change-Id: I75f8f942950cad94439a10e389490ecfdd9272fe
Signed-off-by: Jonathan A. Kollasch <jakllsch(a)kollasch.net>
Build-Tested: build bot (Jenkins) at Sun Apr 8 18:51:51 2012, giving +1
Reviewed-By: Stefan Reinauer <stefan.reinauer(a)coreboot.org> at Sun Apr 8 20:06:57 2012, giving +2
See http://review.coreboot.org/880 for details.
-gerrit
Ronald G. Minnich (rminnich(a)gmail.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/877
-gerrit
commit 39764b7b5f5f997928ffca01fd89469d95016f4e
Author: Ron Minnich <rminnich(a)gmail.com>
Date: Thu Apr 5 23:51:18 2012 -0700
Add multicore support to coreboot
This set of changes supports the ability for multicore support in coreboot.
To give people a chance to examine the structure, and to make bisecting
for bugs easier, we are implementing the patch in 3 stages. This first stage
introduces the basic mechanism but does not make any visible change in
behavior. The APs, instead of being immediately spun down, are asked to
run one debug print function and then spun down.
The means by which APs are tasked was implemented in the NIX operating
system in 2011, see: http://code.google.com/p/nix-os/
The APs come alive and spin on a memory location, contained
in a per-AP structure. Tasking is accomplished
by setting parameters into an argument array and then writing a function
pointer into the memory location. The AP indicates completion by writing
zero to the memory location. The BSP can wait for the AP to finish
(synchronous) or going off to do other work (asynchronous).
This way of tasking APs is incredibly cheap and fast: in the minimal
case, one memory write suffices to launch an AP into doing work.
This code has been tested on a sandybridge system (chromebook)
and on another 4-core sandybridge system.
Change-Id: I19f8587562fd499e98457aa9f11b52400c105697
Signed-off-by: Ron Minnich <rminnich(a)gmail.com>
---
src/arch/x86/include/arch/cpu.h | 5 ++
src/arch/x86/lib/cpu.c | 30 ++++++++--
src/cpu/x86/lapic/lapic_cpu_init.c | 110 +++++++++++++++++++++++++++++++++---
src/include/cpu/cpu.h | 10 +++-
4 files changed, 140 insertions(+), 15 deletions(-)
diff --git a/src/arch/x86/include/arch/cpu.h b/src/arch/x86/include/arch/cpu.h
index 604abde..34d0856 100644
--- a/src/arch/x86/include/arch/cpu.h
+++ b/src/arch/x86/include/arch/cpu.h
@@ -157,9 +157,14 @@ struct cpu_driver {
struct device;
struct cpu_driver *find_cpu_driver(struct device *cpu);
+typedef u32 (*workfunc)(u32, u32, u32);
+
struct cpu_info {
device_t cpu;
unsigned long index;
+ workfunc work;
+ u32 params[3];
+ u32 result;
};
static inline struct cpu_info *cpu_info(void)
diff --git a/src/arch/x86/lib/cpu.c b/src/arch/x86/lib/cpu.c
index 98ede06..ad028e3 100644
--- a/src/arch/x86/lib/cpu.c
+++ b/src/arch/x86/lib/cpu.c
@@ -234,7 +234,7 @@ static void set_cpu_ops(struct device *cpu)
cpu->ops = driver ? driver->ops : NULL;
}
-void cpu_initialize(void)
+void cpu_initialize(struct cpu_info *info)
{
/* Because we busy wait at the printk spinlock.
* It is important to keep the number of printed messages
@@ -242,12 +242,9 @@ void cpu_initialize(void)
* disabled.
*/
struct device *cpu;
- struct cpu_info *info;
struct cpuinfo_x86 c;
- info = cpu_info();
-
- printk(BIOS_INFO, "Initializing CPU #%ld\n", info->index);
+ printk(BIOS_INFO, "cpu_initialize: CPU #%ld\n", info->index);
cpu = info->cpu;
if (!cpu) {
@@ -289,3 +286,26 @@ void cpu_initialize(void)
return;
}
+void cpu_work(struct cpu_info *info)
+{
+ workfunc f;
+ volatile workfunc *ptr = &info->work;
+ volatile u32 *params = info->params;
+
+ printk(BIOS_INFO, "CPU #%ld ready to work\n", info->index);
+
+ while (!*ptr)
+ ;
+ f = *ptr;
+
+ printk(BIOS_SPEW, "CPU #%ld is asked to do %p\n", info->index, f);
+ f(params[0], params[1], params[2]);
+
+ printk(BIOS_SPEW, "CPU #%ld finishes %p, mark %p\n", info->index, f, ptr);
+ *ptr = 0;
+
+ printk(BIOS_INFO, "CPU #%ld leaving cpu_work()\n", info->index);
+
+ return;
+}
+
diff --git a/src/cpu/x86/lapic/lapic_cpu_init.c b/src/cpu/x86/lapic/lapic_cpu_init.c
index 2ac9093..e425884 100644
--- a/src/cpu/x86/lapic/lapic_cpu_init.c
+++ b/src/cpu/x86/lapic/lapic_cpu_init.c
@@ -17,6 +17,21 @@
#include <cpu/intel/speedstep.h>
#if CONFIG_SMP == 1
+
+/* we do not want this struct visible outside this file.
+ * The external interface is via functions.
+ */
+
+struct apcore {
+ u32 stack[1024 - sizeof(struct cpu_info)];
+ struct cpu_info info;
+};
+
+#define TOS(x) (&apcores[(x)].stack[ARRAY_SIZE(apcores[x].stack)-1])
+typedef struct apcore apcore_t;
+
+apcore_t apcores[CONFIG_MAX_CPUS];
+
/* This is a lot more paranoid now, since Linux can NOT handle
* being told there is a CPU when none exists. So any errors
* will return 0, meaning no CPU.
@@ -227,7 +242,6 @@ volatile unsigned long secondary_stack;
int start_cpu(device_t cpu)
{
- extern unsigned char _estack[];
struct cpu_info *info;
unsigned long stack_end;
unsigned long apicid;
@@ -241,19 +255,24 @@ int start_cpu(device_t cpu)
apicid = cpu->path.apic.apic_id;
/* Get an index for the new processor */
+ /* N.B. No checking against MAX_CPUS here.
+ * This function gets called if all those
+ * checks have been passed.
+ */
index = ++last_cpu_index;
/* Find end of the new processors stack */
- stack_end = ((unsigned long)_estack) - (CONFIG_STACK_SIZE*index) - sizeof(struct cpu_info);
+ stack_end = (unsigned long) TOS(index);
/* Record the index and which cpu structure we are using */
- info = (struct cpu_info *)stack_end;
+ info = &apcores[index].info;
info->index = index;
info->cpu = cpu;
/* Advertise the new stack to start_cpu */
secondary_stack = stack_end;
-
+ printk(BIOS_SPEW, "start_cpu CPU %ld secondary_stack %#lx info %p\n",
+ index, secondary_stack, info);
/* Until the cpu starts up report the cpu is not enabled */
cpu->enabled = 0;
cpu->initialized = 0;
@@ -381,8 +400,11 @@ static __inline__ __attribute__((always_inline)) void writecr4(unsigned long Dat
#endif
/* C entry point of secondary cpus */
-void secondary_cpu_init(void)
+void secondary_cpu_init(u32 infoptr)
{
+ /* note: we tried (((u32 *)&infoptr)[1]) but that failed. Compiler? */
+ struct cpu_info *info = (struct cpu_info *)(((u8*)&infoptr)+sizeof(u32));
+
atomic_inc(&active_cpus);
#if CONFIG_SERIAL_CPU_INIT == 1
spin_lock(&start_cpu_lock);
@@ -398,11 +420,11 @@ void secondary_cpu_init(void)
cr4_val |= (1 << 9 | 1 << 10);
writecr4(cr4_val);
#endif
- cpu_initialize();
+ cpu_initialize(info);
#if CONFIG_SERIAL_CPU_INIT == 1
spin_unlock(&start_cpu_lock);
#endif
-
+ cpu_work(info);
atomic_dec(&active_cpus);
stop_this_cpu();
@@ -505,6 +527,7 @@ void initialize_cpus(struct bus *cpu_bus)
info->cpu = alloc_find_dev(cpu_bus, &cpu_path);
#if CONFIG_SMP == 1
+ memset(apcores, 0, sizeof(*apcores));
copy_secondary_start_to_1m_below(); // why here? In case some day we can start core1 in amd_sibling_init
#endif
@@ -522,7 +545,7 @@ void initialize_cpus(struct bus *cpu_bus)
#endif
/* Initialize the bootstrap processor */
- cpu_initialize();
+ cpu_initialize(info);
#if CONFIG_SMP == 1
#if CONFIG_SERIAL_CPU_INIT == 1
@@ -534,3 +557,74 @@ void initialize_cpus(struct bus *cpu_bus)
#endif
}
+#if CONFIG_SMP == 1
+/* work primitives */
+int start_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c)
+{
+ struct cpu_info *info;
+ volatile workfunc *ptr;
+ volatile u32 *params;
+
+ if (core >= CONFIG_MAX_CPUS){
+ printk(BIOS_EMERG, "start_work: invalid core %d\n", core);
+ return -1;
+ }
+ info = &apcores[core].info;
+ if (! info->cpu->initialized){
+ printk(BIOS_EMERG, "start_work: core not initialized %d\n", core);
+ return -1;
+ }
+ ptr = &info->work;
+ if (*ptr){
+ printk(BIOS_EMERG, "start_work: core is busy %d\n", core);
+ return -1;
+ }
+ params = (u32 *)&info->params;
+ params[0] = a;
+ params[1] = b;
+ params[2] = c;
+ printk(BIOS_INFO, "BSP starts work on core %d\n", core);
+ *ptr = f;
+ return 0;
+}
+
+int wait_work(unsigned int core, u32 *retval, unsigned int maxwait)
+{
+ int i = 0;
+ struct cpu_info *info;
+ volatile workfunc *ptr;
+ volatile u32 *result;
+
+ if (core >= CONFIG_MAX_CPUS){
+ printk(BIOS_EMERG, "start_work: invalid core %d\n", core);
+ return -1;
+ }
+ info = &apcores[core].info;
+ if (! info->cpu->initialized){
+ printk(BIOS_EMERG, "start_work: core not initialized %d\n", core);
+ return -1;
+ }
+ ptr = &info->work;
+ while (i++ < maxwait && *ptr)
+ if (i%100000 == 0) printk(BIOS_SPEW, "still waiting on %p at %d\n",
+ ptr, i);
+ if (*ptr){
+ printk(BIOS_INFO, "core %d still running after %d iterations\n",
+ core, i);
+ return -1;
+ }
+ result = &info->result;
+ printk(BIOS_INFO, "info->work after result is %#lx\n", (unsigned long)*result);
+ if (retval)
+ *retval = *result;
+ return 0;
+}
+
+int run_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c, u32 *retval,
+ unsigned int timeout)
+{
+ if (start_work(core, f, a, b, c))
+ return -1;
+ return wait_work(core, retval, timeout);
+}
+#endif /* CONFIG_SMP == 1 */
diff --git a/src/include/cpu/cpu.h b/src/include/cpu/cpu.h
index cca2be1..39f2fa2 100644
--- a/src/include/cpu/cpu.h
+++ b/src/include/cpu/cpu.h
@@ -5,9 +5,15 @@ struct device;
struct bus;
#include <arch/cpu.h>
-void cpu_initialize(void);
+void cpu_initialize(struct cpu_info *info);
void initialize_cpus(struct bus *cpu_bus);
-void secondary_cpu_init(void);
+void secondary_cpu_init(u32 unused);
+int start_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c);
+int wait_work(unsigned int core, u32 *retval, unsigned int maxwait);
+int run_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c, u32 *retval,
+ unsigned int timeout);
+void cpu_work(struct cpu_info *info);
+
#if !CONFIG_WAIT_BEFORE_CPUS_INIT
#define cpus_ready_for_init() do {} while(0)
Am 06.04.2012 22:38 schrieb ron minnich:
> On Fri, Apr 6, 2012 at 11:12 AM, Marc Jones <marcj303(a)gmail.com> wrote:
>
>>> One such a nice app would be zmodem download of raminit.
>> This is an interesting thought, but really a debug/development
>> feature.
> We even talked about this in v3. But it never got done, seems there is
> no interest.
I think it didn't get done for the same reason we had to abandon v3:
lack of manpower. A few highly motivated developers spent pretty much
all of their time on v3, but bringing a completely new design and
codebase from zero to production was simply too big of a task, yielding
stable Qemu and GeodeLX ports but not much for other hardware. At the
same time, v2 was moving along and getting new hardware support all the
time.
In the end, it was decided that the v2 codebase should get some of the
designs proven in v3, and thus v4 was born.
There is still some stuff I would love to prototype in v3 and then move
to v4, and downloading raminit is one of those features. However, I
won't have sufficient spare time this year to attempt that.
>> I am not convinced of the value of earlier serial console. It adds
>> complexity where things should be simple. I hesitate to continue
>> moving more code before CAR. We have had a long standing goal to
>> reduce code before CAR.
> CAR is fragile! I agree. It should be reduced as much as possible.
>
> You may want to look at what we did in v3, it has some similarities to
> your ideas. And, we dropped it :-)
We already have multiple pre-CAR serial consoles if you want to build
them: llshell, SerialICE and a few others. The trick is to integrate
them into the coreboot execution flow in a way which lets the boot
continue automatically in the normal case, and allow console access when
you need it.
>>> This also has a CAR and RAM environment (GCC-build) that can execute XIP
>>> stages from Flash ROM or decompress stages to CAR/RAM from Flash ROM.
>>
>> At least with AMD processors, you can't execute code out of the data
>> cache. The only way to get instructions cached is with a code fetch,
>> which doesn't hit the data cache.
By the way, if you're into really weird stuff, you could try to work
around the apparent "data writes hit dcache, not icache" limitation by
running the downloaded code in x86emu in CAR because for x86emu the
emulated instructions are data. You'll need flameproof underwear if you
plan to submit such a patch, though.
Regards,
Carl-Daniel
--
http://www.hailfinger.org/
Ronald G. Minnich (rminnich(a)gmail.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/877
-gerrit
commit 9f3f26668c3332aaeb0bde1b6cda80d363496da2
Author: Ron Minnich <rminnich(a)gmail.com>
Date: Thu Apr 5 23:51:18 2012 -0700
Add multicore support to coreboot
This set of changes supports the ability for multicore support in coreboot.
To give people a chance to examine the structure, and to make bisecting
for bugs easier, we are implementing the patch in 3 stages. This first stage
introduces the basic mechanism but does not make any visible change in
behavior. The APs, instead of being immediately spun down, are asked to
run one debug print function and then spun down.
The means by which APs are tasked was implemented in the NIX operating
system in 2011, see: http://code.google.com/p/nix-os/
The APs come alive and spin on a memory location, contained
in a per-AP structure. Tasking is accomplished
by setting parameters into an argument array and then writing a function
pointer into the memory location. The AP indicates completion by writing
zero to the memory location. The BSP can wait for the AP to finish
(synchronous) or going off to do other work (asynchronous).
This way of tasking APs is incredibly cheap and fast: in the minimal
case, one memory write suffices to launch an AP into doing work.
This code has been tested on a sandybridge system (chromebook)
and on another 4-core sandybridge system.
Change-Id: I19f8587562fd499e98457aa9f11b52400c105697
Signed-off-by: Ron Minnich <rminnich(a)gmail.com>
---
src/arch/x86/include/arch/cpu.h | 5 ++
src/arch/x86/lib/cpu.c | 30 ++++++++--
src/cpu/x86/lapic/lapic_cpu_init.c | 108 +++++++++++++++++++++++++++++++++---
src/include/cpu/cpu.h | 10 +++-
4 files changed, 137 insertions(+), 16 deletions(-)
diff --git a/src/arch/x86/include/arch/cpu.h b/src/arch/x86/include/arch/cpu.h
index 604abde..34d0856 100644
--- a/src/arch/x86/include/arch/cpu.h
+++ b/src/arch/x86/include/arch/cpu.h
@@ -157,9 +157,14 @@ struct cpu_driver {
struct device;
struct cpu_driver *find_cpu_driver(struct device *cpu);
+typedef u32 (*workfunc)(u32, u32, u32);
+
struct cpu_info {
device_t cpu;
unsigned long index;
+ workfunc work;
+ u32 params[3];
+ u32 result;
};
static inline struct cpu_info *cpu_info(void)
diff --git a/src/arch/x86/lib/cpu.c b/src/arch/x86/lib/cpu.c
index 98ede06..ad028e3 100644
--- a/src/arch/x86/lib/cpu.c
+++ b/src/arch/x86/lib/cpu.c
@@ -234,7 +234,7 @@ static void set_cpu_ops(struct device *cpu)
cpu->ops = driver ? driver->ops : NULL;
}
-void cpu_initialize(void)
+void cpu_initialize(struct cpu_info *info)
{
/* Because we busy wait at the printk spinlock.
* It is important to keep the number of printed messages
@@ -242,12 +242,9 @@ void cpu_initialize(void)
* disabled.
*/
struct device *cpu;
- struct cpu_info *info;
struct cpuinfo_x86 c;
- info = cpu_info();
-
- printk(BIOS_INFO, "Initializing CPU #%ld\n", info->index);
+ printk(BIOS_INFO, "cpu_initialize: CPU #%ld\n", info->index);
cpu = info->cpu;
if (!cpu) {
@@ -289,3 +286,26 @@ void cpu_initialize(void)
return;
}
+void cpu_work(struct cpu_info *info)
+{
+ workfunc f;
+ volatile workfunc *ptr = &info->work;
+ volatile u32 *params = info->params;
+
+ printk(BIOS_INFO, "CPU #%ld ready to work\n", info->index);
+
+ while (!*ptr)
+ ;
+ f = *ptr;
+
+ printk(BIOS_SPEW, "CPU #%ld is asked to do %p\n", info->index, f);
+ f(params[0], params[1], params[2]);
+
+ printk(BIOS_SPEW, "CPU #%ld finishes %p, mark %p\n", info->index, f, ptr);
+ *ptr = 0;
+
+ printk(BIOS_INFO, "CPU #%ld leaving cpu_work()\n", info->index);
+
+ return;
+}
+
diff --git a/src/cpu/x86/lapic/lapic_cpu_init.c b/src/cpu/x86/lapic/lapic_cpu_init.c
index 2ac9093..108bb66 100644
--- a/src/cpu/x86/lapic/lapic_cpu_init.c
+++ b/src/cpu/x86/lapic/lapic_cpu_init.c
@@ -17,6 +17,21 @@
#include <cpu/intel/speedstep.h>
#if CONFIG_SMP == 1
+
+/* we do not want this struct visible outside this file.
+ * The external interface is via functions.
+ */
+
+struct apcore {
+ u32 stack[1024 - sizeof(struct cpu_info)];
+ struct cpu_info info;
+};
+
+#define TOS(x) (&apcores[(x)].stack[ARRAY_SIZE(apcores[x].stack)-1])
+typedef struct apcore apcore_t;
+
+apcore_t apcores[CONFIG_MAX_CPUS];
+
/* This is a lot more paranoid now, since Linux can NOT handle
* being told there is a CPU when none exists. So any errors
* will return 0, meaning no CPU.
@@ -227,7 +242,6 @@ volatile unsigned long secondary_stack;
int start_cpu(device_t cpu)
{
- extern unsigned char _estack[];
struct cpu_info *info;
unsigned long stack_end;
unsigned long apicid;
@@ -244,16 +258,17 @@ int start_cpu(device_t cpu)
index = ++last_cpu_index;
/* Find end of the new processors stack */
- stack_end = ((unsigned long)_estack) - (CONFIG_STACK_SIZE*index) - sizeof(struct cpu_info);
+ stack_end = (unsigned long) TOS(index);
/* Record the index and which cpu structure we are using */
- info = (struct cpu_info *)stack_end;
+ info = &apcores[index].info;
info->index = index;
info->cpu = cpu;
/* Advertise the new stack to start_cpu */
secondary_stack = stack_end;
-
+ printk(BIOS_SPEW, "start_cpu CPU %ld secondary_stack %#lx info %p\n",
+ index, secondary_stack, info);
/* Until the cpu starts up report the cpu is not enabled */
cpu->enabled = 0;
cpu->initialized = 0;
@@ -381,8 +396,11 @@ static __inline__ __attribute__((always_inline)) void writecr4(unsigned long Dat
#endif
/* C entry point of secondary cpus */
-void secondary_cpu_init(void)
-{
+void secondary_cpu_init(u32 infoptr)
+ {
+ /* note: we tried (((u32 *)&infoptr)[1]) but that failed. Compiler? */
+ struct cpu_info *info = (struct cpu_info *)(((u8*)&infoptr)+sizeof(u32));
+
atomic_inc(&active_cpus);
#if CONFIG_SERIAL_CPU_INIT == 1
spin_lock(&start_cpu_lock);
@@ -398,11 +416,11 @@ void secondary_cpu_init(void)
cr4_val |= (1 << 9 | 1 << 10);
writecr4(cr4_val);
#endif
- cpu_initialize();
+ cpu_initialize(info);
#if CONFIG_SERIAL_CPU_INIT == 1
spin_unlock(&start_cpu_lock);
#endif
-
+ cpu_work(info);
atomic_dec(&active_cpus);
stop_this_cpu();
@@ -505,6 +523,7 @@ void initialize_cpus(struct bus *cpu_bus)
info->cpu = alloc_find_dev(cpu_bus, &cpu_path);
#if CONFIG_SMP == 1
+ memset(apcores, 0, sizeof(*apcores));
copy_secondary_start_to_1m_below(); // why here? In case some day we can start core1 in amd_sibling_init
#endif
@@ -522,7 +541,7 @@ void initialize_cpus(struct bus *cpu_bus)
#endif
/* Initialize the bootstrap processor */
- cpu_initialize();
+ cpu_initialize(info);
#if CONFIG_SMP == 1
#if CONFIG_SERIAL_CPU_INIT == 1
@@ -534,3 +553,74 @@ void initialize_cpus(struct bus *cpu_bus)
#endif
}
+#if CONFIG_SMP == 1
+/* work primitives */
+int start_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c)
+{
+ struct cpu_info *info;
+ volatile workfunc *ptr;
+ volatile u32 *params;
+
+ if (core > CONFIG_MAX_CPUS){
+ printk(BIOS_EMERG, "start_work: invalid core %d\n", core);
+ return -1;
+ }
+ info = &apcores[core].info;
+ if (! info->cpu->initialized){
+ printk(BIOS_EMERG, "start_work: core not initialized %d\n", core);
+ return -1;
+ }
+ ptr = &info->work;
+ if (*ptr){
+ printk(BIOS_EMERG, "start_work: core is busy %d\n", core);
+ return -1;
+ }
+ params = (u32 *)&info->params;
+ params[0] = a;
+ params[1] = b;
+ params[2] = c;
+ printk(BIOS_INFO, "BSP starts work on core %d\n", core);
+ *ptr = f;
+ return 0;
+}
+
+int wait_work(unsigned int core, u32 *retval, unsigned int maxwait)
+{
+ int i = 0;
+ struct cpu_info *info;
+ volatile workfunc *ptr;
+ volatile u32 *result;
+
+ if (core > CONFIG_MAX_CPUS){
+ printk(BIOS_EMERG, "start_work: invalid core %d\n", core);
+ return -1;
+ }
+ info = &apcores[core].info;
+ if (! info->cpu->initialized){
+ printk(BIOS_EMERG, "start_work: core not initialized %d\n", core);
+ return -1;
+ }
+ ptr = &info->work;
+ while (i++ < maxwait && *ptr)
+ if (i%100000 == 0) printk(BIOS_SPEW, "still waiting on %p at %d\n",
+ ptr, i);
+ if (*ptr){
+ printk(BIOS_INFO, "core %d still running after %d iterations\n",
+ core, i);
+ return -1;
+ }
+ result = &info->result;
+ printk(BIOS_INFO, "info->work after result is %#lx\n", (unsigned long)*result);
+ if (retval)
+ *retval = *result;
+ return 0;
+}
+
+int run_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c, u32 *retval,
+ unsigned int timeout)
+{
+ if (start_work(core, f, a, b, c))
+ return -1;
+ return wait_work(core, retval, timeout);
+}
+#endif /* CONFIG_SMP == 1 */
diff --git a/src/include/cpu/cpu.h b/src/include/cpu/cpu.h
index cca2be1..39f2fa2 100644
--- a/src/include/cpu/cpu.h
+++ b/src/include/cpu/cpu.h
@@ -5,9 +5,15 @@ struct device;
struct bus;
#include <arch/cpu.h>
-void cpu_initialize(void);
+void cpu_initialize(struct cpu_info *info);
void initialize_cpus(struct bus *cpu_bus);
-void secondary_cpu_init(void);
+void secondary_cpu_init(u32 unused);
+int start_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c);
+int wait_work(unsigned int core, u32 *retval, unsigned int maxwait);
+int run_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c, u32 *retval,
+ unsigned int timeout);
+void cpu_work(struct cpu_info *info);
+
#if !CONFIG_WAIT_BEFORE_CPUS_INIT
#define cpus_ready_for_init() do {} while(0)
Ronald G. Minnich (rminnich(a)gmail.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/877
-gerrit
commit e3f939c617fd9eced19ccc2f8bfd6f9012c232f3
Author: Ron Minnich <rminnich(a)gmail.com>
Date: Thu Apr 5 23:51:18 2012 -0700
Add multicore support to coreboot
This set of changes supports the ability for multicore support in coreboot.
To give people a chance to examine the structure, and to make bisecting
for bugs easier, we are implementing the patch in 3 stages. This first stage
introduces the basic mechanism but does not make any visible change in
behavior. The APs, instead of being immediately spun down, are asked to
run one debug print function and then spun down.
The means by which APs are tasked was implemented in the NIX operating
system in 2011, see: http://code.google.com/p/nix-os/
The APs come alive and spin on a memory location, contained
in a per-AP structure. Tasking is accomplished
by setting parameters into an argument array and then writing a function
pointer into the memory location. The AP indicates completion by writing
zero to the memory location. The BSP can wait for the AP to finish
(synchronous) or going off to do other work (asynchronous).
This way of tasking APs is incredibly cheap and fast: in the minimal
case, one memory write suffices to launch an AP into doing work.
This code has been tested on a sandybridge system (chromebook)
and on another 4-core sandybridge system.
Change-Id: I19f8587562fd499e98457aa9f11b52400c105697
Signed-off-by: Ron Minnich <rminnich(a)gmail.com>
---
src/arch/x86/include/arch/cpu.h | 5 ++
src/arch/x86/lib/cpu.c | 30 ++++++++--
src/cpu/x86/lapic/lapic_cpu_init.c | 108 +++++++++++++++++++++++++++++++++---
src/include/cpu/cpu.h | 10 +++-
4 files changed, 137 insertions(+), 16 deletions(-)
diff --git a/src/arch/x86/include/arch/cpu.h b/src/arch/x86/include/arch/cpu.h
index 604abde..34d0856 100644
--- a/src/arch/x86/include/arch/cpu.h
+++ b/src/arch/x86/include/arch/cpu.h
@@ -157,9 +157,14 @@ struct cpu_driver {
struct device;
struct cpu_driver *find_cpu_driver(struct device *cpu);
+typedef u32 (*workfunc)(u32, u32, u32);
+
struct cpu_info {
device_t cpu;
unsigned long index;
+ workfunc work;
+ u32 params[3];
+ u32 result;
};
static inline struct cpu_info *cpu_info(void)
diff --git a/src/arch/x86/lib/cpu.c b/src/arch/x86/lib/cpu.c
index 98ede06..ad028e3 100644
--- a/src/arch/x86/lib/cpu.c
+++ b/src/arch/x86/lib/cpu.c
@@ -234,7 +234,7 @@ static void set_cpu_ops(struct device *cpu)
cpu->ops = driver ? driver->ops : NULL;
}
-void cpu_initialize(void)
+void cpu_initialize(struct cpu_info *info)
{
/* Because we busy wait at the printk spinlock.
* It is important to keep the number of printed messages
@@ -242,12 +242,9 @@ void cpu_initialize(void)
* disabled.
*/
struct device *cpu;
- struct cpu_info *info;
struct cpuinfo_x86 c;
- info = cpu_info();
-
- printk(BIOS_INFO, "Initializing CPU #%ld\n", info->index);
+ printk(BIOS_INFO, "cpu_initialize: CPU #%ld\n", info->index);
cpu = info->cpu;
if (!cpu) {
@@ -289,3 +286,26 @@ void cpu_initialize(void)
return;
}
+void cpu_work(struct cpu_info *info)
+{
+ workfunc f;
+ volatile workfunc *ptr = &info->work;
+ volatile u32 *params = info->params;
+
+ printk(BIOS_INFO, "CPU #%ld ready to work\n", info->index);
+
+ while (!*ptr)
+ ;
+ f = *ptr;
+
+ printk(BIOS_SPEW, "CPU #%ld is asked to do %p\n", info->index, f);
+ f(params[0], params[1], params[2]);
+
+ printk(BIOS_SPEW, "CPU #%ld finishes %p, mark %p\n", info->index, f, ptr);
+ *ptr = 0;
+
+ printk(BIOS_INFO, "CPU #%ld leaving cpu_work()\n", info->index);
+
+ return;
+}
+
diff --git a/src/cpu/x86/lapic/lapic_cpu_init.c b/src/cpu/x86/lapic/lapic_cpu_init.c
index ed9940c..a1a2097 100644
--- a/src/cpu/x86/lapic/lapic_cpu_init.c
+++ b/src/cpu/x86/lapic/lapic_cpu_init.c
@@ -16,6 +16,21 @@
#include <cpu/cpu.h>
#if CONFIG_SMP == 1
+
+/* we do not want this struct visible outside this file.
+ * The external interface is via functions.
+ */
+
+struct apcore {
+ u32 stack[1024 - sizeof(struct cpu_info)];
+ struct cpu_info info;
+};
+
+#define TOS(x) (&apcores[(x)].stack[ARRAY_SIZE(apcores[x].stack)-1])
+typedef struct apcore apcore_t;
+
+apcore_t apcores[CONFIG_MAX_CPUS];
+
/* This is a lot more paranoid now, since Linux can NOT handle
* being told there is a CPU when none exists. So any errors
* will return 0, meaning no CPU.
@@ -226,7 +241,6 @@ volatile unsigned long secondary_stack;
int start_cpu(device_t cpu)
{
- extern unsigned char _estack[];
struct cpu_info *info;
unsigned long stack_end;
unsigned long apicid;
@@ -243,16 +257,17 @@ int start_cpu(device_t cpu)
index = ++last_cpu_index;
/* Find end of the new processors stack */
- stack_end = ((unsigned long)_estack) - (CONFIG_STACK_SIZE*index) - sizeof(struct cpu_info);
+ stack_end = (unsigned long) TOS(index);
/* Record the index and which cpu structure we are using */
- info = (struct cpu_info *)stack_end;
+ info = &apcores[index].info;
info->index = index;
info->cpu = cpu;
/* Advertise the new stack to start_cpu */
secondary_stack = stack_end;
-
+ printk(BIOS_SPEW, "start_cpu CPU %ld secondary_stack %#lx info %p\n",
+ index, secondary_stack, info);
/* Until the cpu starts up report the cpu is not enabled */
cpu->enabled = 0;
cpu->initialized = 0;
@@ -380,8 +395,11 @@ static __inline__ __attribute__((always_inline)) void writecr4(unsigned long Dat
#endif
/* C entry point of secondary cpus */
-void secondary_cpu_init(void)
-{
+void secondary_cpu_init(u32 infoptr)
+ {
+ /* note: we tried (((u32 *)&infoptr)[1]) but that failed. Compiler? */
+ struct cpu_info *info = (struct cpu_info *)(((u8*)&infoptr)+sizeof(u32));
+
atomic_inc(&active_cpus);
#if CONFIG_SERIAL_CPU_INIT == 1
spin_lock(&start_cpu_lock);
@@ -397,11 +415,11 @@ void secondary_cpu_init(void)
cr4_val |= (1 << 9 | 1 << 10);
writecr4(cr4_val);
#endif
- cpu_initialize();
+ cpu_initialize(info);
#if CONFIG_SERIAL_CPU_INIT == 1
spin_unlock(&start_cpu_lock);
#endif
-
+ cpu_work(info);
atomic_dec(&active_cpus);
stop_this_cpu();
@@ -498,6 +516,7 @@ void initialize_cpus(struct bus *cpu_bus)
info->cpu = alloc_find_dev(cpu_bus, &cpu_path);
#if CONFIG_SMP == 1
+ memset(apcores, 0, sizeof(*apcores));
copy_secondary_start_to_1m_below(); // why here? In case some day we can start core1 in amd_sibling_init
#endif
@@ -515,7 +534,7 @@ void initialize_cpus(struct bus *cpu_bus)
#endif
/* Initialize the bootstrap processor */
- cpu_initialize();
+ cpu_initialize(info);
#if CONFIG_SMP == 1
#if CONFIG_SERIAL_CPU_INIT == 1
@@ -527,3 +546,74 @@ void initialize_cpus(struct bus *cpu_bus)
#endif
}
+#if CONFIG_SMP == 1
+/* work primitives */
+int start_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c)
+{
+ struct cpu_info *info;
+ volatile workfunc *ptr;
+ volatile u32 *params;
+
+ if (core > CONFIG_MAX_CPUS){
+ printk(BIOS_EMERG, "start_work: invalid core %d\n", core);
+ return -1;
+ }
+ info = &apcores[core].info;
+ if (! info->cpu->initialized){
+ printk(BIOS_EMERG, "start_work: core not initialized %d\n", core);
+ return -1;
+ }
+ ptr = &info->work;
+ if (*ptr){
+ printk(BIOS_EMERG, "start_work: core is busy %d\n", core);
+ return -1;
+ }
+ params = (u32 *)&info->params;
+ params[0] = a;
+ params[1] = b;
+ params[2] = c;
+ printk(BIOS_INFO, "BSP starts work on core %d\n", core);
+ *ptr = f;
+ return 0;
+}
+
+int wait_work(unsigned int core, u32 *retval, unsigned int maxwait)
+{
+ int i = 0;
+ struct cpu_info *info;
+ volatile workfunc *ptr;
+ volatile u32 *result;
+
+ if (core > CONFIG_MAX_CPUS){
+ printk(BIOS_EMERG, "start_work: invalid core %d\n", core);
+ return -1;
+ }
+ info = &apcores[core].info;
+ if (! info->cpu->initialized){
+ printk(BIOS_EMERG, "start_work: core not initialized %d\n", core);
+ return -1;
+ }
+ ptr = &info->work;
+ while (i++ < maxwait && *ptr)
+ if (i%100000 == 0) printk(BIOS_SPEW, "still waiting on %p at %d\n",
+ ptr, i);
+ if (*ptr){
+ printk(BIOS_INFO, "core %d still running after %d iterations\n",
+ core, i);
+ return -1;
+ }
+ result = &info->result;
+ printk(BIOS_INFO, "info->work after result is %#lx\n", (unsigned long)*result);
+ if (retval)
+ *retval = *result;
+ return 0;
+}
+
+int run_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c, u32 *retval,
+ unsigned int timeout)
+{
+ if (start_work(core, f, a, b, c))
+ return -1;
+ return wait_work(core, retval, timeout);
+}
+#endif /* CONFIG_SMP == 1 */
diff --git a/src/include/cpu/cpu.h b/src/include/cpu/cpu.h
index cca2be1..39f2fa2 100644
--- a/src/include/cpu/cpu.h
+++ b/src/include/cpu/cpu.h
@@ -5,9 +5,15 @@ struct device;
struct bus;
#include <arch/cpu.h>
-void cpu_initialize(void);
+void cpu_initialize(struct cpu_info *info);
void initialize_cpus(struct bus *cpu_bus);
-void secondary_cpu_init(void);
+void secondary_cpu_init(u32 unused);
+int start_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c);
+int wait_work(unsigned int core, u32 *retval, unsigned int maxwait);
+int run_work(unsigned int core, workfunc f, u32 a, u32 b, u32 c, u32 *retval,
+ unsigned int timeout);
+void cpu_work(struct cpu_info *info);
+
#if !CONFIG_WAIT_BEFORE_CPUS_INIT
#define cpus_ready_for_init() do {} while(0)