This allows qemu to receive notifications from the guest OS on success or failure of a memory hotplug request. The guest OS needs to implement the _OST functionality for this to work (linux-next: http://lkml.org/lkml/2012/6/25/321)
This patch also updates dimm bitmap state and hot-remove pending flag on hot-remove fail. This allows failed hot operations to be retried at anytime (only works for guests that use _OST notification). Also adds new _OST registers in docs/specs/acpi_hotplug.txt --- docs/specs/acpi_hotplug.txt | 25 +++++++++++++++++++++++++ hw/acpi_ich9.c | 31 ++++++++++++++++++++++++++++--- hw/acpi_ich9.h | 3 +++ hw/acpi_piix4.c | 35 ++++++++++++++++++++++++++++++++--- hw/dimm.c | 28 +++++++++++++++++++++++++++- hw/dimm.h | 11 ++++++++++- 6 files changed, 125 insertions(+), 8 deletions(-)
diff --git a/docs/specs/acpi_hotplug.txt b/docs/specs/acpi_hotplug.txt index cf86242..536da16 100644 --- a/docs/specs/acpi_hotplug.txt +++ b/docs/specs/acpi_hotplug.txt @@ -20,3 +20,28 @@ ejected.
Written by ACPI memory device _EJ0 method to notify qemu of successfull hot-removal. Write-only. + +Memory Dimm ejection failure notification (IO port 0xafa1, 1-byte access): +--------------------------------------------------------------- +Dimm hot-remove _OST notification. Byte value indicates Dimm slot for which +ejection failed. + +Written by ACPI memory device _OST method to notify qemu of failed +hot-removal. Write-only. + +Memory Dimm insertion success notification (IO port 0xafa2, 1-byte access): +--------------------------------------------------------------- +Dimm hot-remove _OST notification. Byte value indicates Dimm slot for which +insertion succeeded. + +Written by ACPI memory device _OST method to notify qemu of failed +hot-add. Write-only. + +Memory Dimm insertion failure notification (IO port 0xafa3, 1-byte access): +--------------------------------------------------------------- +Dimm hot-remove _OST notification. Byte value indicates Dimm slot for which +insertion failed. + +Written by ACPI memory device _OST method to notify qemu of failed +hot-add. Write-only. + diff --git a/hw/acpi_ich9.c b/hw/acpi_ich9.c index f5dc1c9..2705230 100644 --- a/hw/acpi_ich9.c +++ b/hw/acpi_ich9.c @@ -111,6 +111,15 @@ static void memhp_writeb(void *opaque, uint32_t addr, uint32_t val) case ICH9_MEM_EJ_BASE - ICH9_MEM_BASE: dimm_notify(val, DIMM_REMOVE_SUCCESS); break; + case ICH9_MEM_OST_REMOVE_FAIL - ICH9_MEM_BASE: + dimm_notify(val, DIMM_REMOVE_FAIL); + break; + case ICH9_MEM_OST_ADD_SUCCESS - ICH9_MEM_BASE: + dimm_notify(val, DIMM_ADD_SUCCESS); + break; + case ICH9_MEM_OST_ADD_FAIL - ICH9_MEM_BASE: + dimm_notify(val, DIMM_ADD_FAIL); + break; default: ICH9_DEBUG("memhp write invalid %x <== %d\n", addr, val); } @@ -125,7 +134,7 @@ static const MemoryRegionOps ich9_memhp_ops = { }, { .offset = ICH9_MEM_EJ_BASE - ICH9_MEM_BASE, - .len = 1, .size = 1, + .len = 4, .size = 1, .write = memhp_writeb, }, PORTIO_END_OF_LIST() @@ -274,6 +283,22 @@ static int ich9_dimm_hotplug(DeviceState *qdev, DimmDevice *dev, int return 0; }
+static int ich9_dimm_revert(DeviceState *qdev, DimmDevice *dev, int add) +{ + PCIDevice *pci_dev = DO_UPCAST(PCIDevice, qdev, qdev); + ICH9LPCState *s = DO_UPCAST(ICH9LPCState, d, pci_dev); + struct gpe_regs *g = &s->pm.gperegs; + DimmDevice *slot = DIMM(dev); + int idx = slot->idx; + + if (add) { + g->mems_sts[idx/8] &= ~(1 << (idx%8)); + } else { + g->mems_sts[idx/8] |= (1 << (idx%8)); + } + return 0; +} + void ich9_pm_init(void *device, qemu_irq sci_irq, qemu_irq cmos_s3) { ICH9LPCState *lpc = (ICH9LPCState *)device; @@ -296,10 +321,10 @@ void ich9_pm_init(void *device, qemu_irq sci_irq, qemu_irq cmos_s3) memory_region_add_subregion(&pm->io, ICH9_PMIO_SMI_EN, &pm->io_smi);
memory_region_init_io(&pm->io_memhp, &ich9_memhp_ops, pm, "apci-memhp0", - DIMM_BITMAP_BYTES + 1); + DIMM_BITMAP_BYTES + 4); memory_region_add_subregion(get_system_io(), ICH9_MEM_BASE, &pm->io_memhp);
- dimm_bus_hotplug(ich9_dimm_hotplug, &lpc->d.qdev); + dimm_bus_hotplug(ich9_dimm_hotplug, ich9_dimm_revert, &lpc->d.qdev);
pm->irq = sci_irq; qemu_register_reset(pm_reset, pm); diff --git a/hw/acpi_ich9.h b/hw/acpi_ich9.h index af61a2d..8f57cd8 100644 --- a/hw/acpi_ich9.h +++ b/hw/acpi_ich9.h @@ -26,6 +26,9 @@ #define ICH9_MEM_BASE 0xaf80 #define ICH9_MEM_EJ_BASE 0xafa0 #define ICH9_MEM_HOTPLUG_STATUS 8 +#define ICH9_MEM_OST_REMOVE_FAIL 0xafa1 +#define ICH9_MEM_OST_ADD_SUCCESS 0xafa2 +#define ICH9_MEM_OST_ADD_FAIL 0xafa3
typedef struct ICH9LPCPMRegs { /* diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c index 6e4718e..70aa480 100644 --- a/hw/acpi_piix4.c +++ b/hw/acpi_piix4.c @@ -51,6 +51,9 @@ #define PCI_RMV_BASE 0xae0c #define MEM_BASE 0xaf80 #define MEM_EJ_BASE 0xafa0 +#define MEM_OST_REMOVE_FAIL 0xafa1 +#define MEM_OST_ADD_SUCCESS 0xafa2 +#define MEM_OST_ADD_FAIL 0xafa3
#define PIIX4_MEM_HOTPLUG_STATUS 8 #define PIIX4_PCI_HOTPLUG_STATUS 2 @@ -90,6 +93,7 @@ typedef struct PIIX4PMState { uint8_t s4_val; } PIIX4PMState;
+static int piix4_dimm_revert(DeviceState *qdev, DimmDevice *dev, int add); static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s);
#define ACPI_ENABLE 0xf1 @@ -551,6 +555,15 @@ static void memhp_writeb(void *opaque, uint32_t addr, uint32_t val) case MEM_EJ_BASE - MEM_BASE: dimm_notify(val, DIMM_REMOVE_SUCCESS); break; + case MEM_OST_REMOVE_FAIL - MEM_BASE: + dimm_notify(val, DIMM_REMOVE_FAIL); + break; + case MEM_OST_ADD_SUCCESS - MEM_BASE: + dimm_notify(val, DIMM_ADD_SUCCESS); + break; + case MEM_OST_ADD_FAIL - MEM_BASE: + dimm_notify(val, DIMM_ADD_FAIL); + break; default: PIIX4_DPRINTF("memhp write invalid %x <== %d\n", addr, val); } @@ -564,7 +577,7 @@ static const MemoryRegionOps piix4_memhp_ops = { .read = memhp_readb, }, { - .offset = MEM_EJ_BASE - MEM_BASE, .len = 1, + .offset = MEM_EJ_BASE - MEM_BASE, .len = 4, .size = 1, .write = memhp_writeb, }, @@ -653,7 +666,7 @@ static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s) memory_region_add_subregion(get_system_io(), PCI_HOTPLUG_ADDR, &s->io_pci); memory_region_init_io(&s->io_memhp, &piix4_memhp_ops, s, "apci-memhp0", - DIMM_BITMAP_BYTES + 1); + DIMM_BITMAP_BYTES + 4); memory_region_add_subregion(get_system_io(), MEM_BASE, &s->io_memhp);
for (i = 0; i < DIMM_BITMAP_BYTES; i++) { @@ -661,7 +674,7 @@ static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s) }
pci_bus_hotplug(bus, piix4_device_hotplug, &s->dev.qdev); - dimm_bus_hotplug(piix4_dimm_hotplug, &s->dev.qdev); + dimm_bus_hotplug(piix4_dimm_hotplug, piix4_dimm_revert, &s->dev.qdev); }
static void enable_device(PIIX4PMState *s, int slot) @@ -706,6 +719,22 @@ static int piix4_dimm_hotplug(DeviceState *qdev, DimmDevice *dev, int return 0; }
+static int piix4_dimm_revert(DeviceState *qdev, DimmDevice *dev, int add) +{ + PCIDevice *pci_dev = DO_UPCAST(PCIDevice, qdev, qdev); + PIIX4PMState *s = DO_UPCAST(PIIX4PMState, dev, pci_dev); + struct gpe_regs *g = &s->gperegs; + DimmDevice *slot = DIMM(dev); + int idx = slot->idx; + + if (add) { + g->mems_sts[idx/8] &= ~(1 << (idx%8)); + } else { + g->mems_sts[idx/8] |= (1 << (idx%8)); + } + return 0; +} + static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev, PCIHotplugState state) { diff --git a/hw/dimm.c b/hw/dimm.c index 4670ae6..69b97b6 100644 --- a/hw/dimm.c +++ b/hw/dimm.c @@ -149,7 +149,8 @@ void dimm_config_create(char *id, uint64_t size, const char *bus, uint64_t node, QTAILQ_INSERT_TAIL(&dimmconfig_list, dimm_cfg, nextdimmcfg); }
-void dimm_bus_hotplug(dimm_hotplug_fn hotplug, DeviceState *qdev) +void dimm_bus_hotplug(dimm_hotplug_fn hotplug, dimm_hotplug_fn revert, + DeviceState *qdev) { DimmBus *bus; QLIST_FOREACH(bus, &memory_buses, next) { @@ -157,6 +158,7 @@ void dimm_bus_hotplug(dimm_hotplug_fn hotplug, DeviceState *qdev) bus->qbus.allow_hotplug = 1; bus->dimm_hotplug_qdev = qdev; bus->dimm_hotplug = hotplug; + bus->dimm_revert = revert; } }
@@ -168,6 +170,7 @@ static void dimm_plug_device(DimmDevice *slot) if (bus->dimm_hotplug) { bus->dimm_hotplug(bus->dimm_hotplug_qdev, slot, 1); } + slot->pending = DIMM_ADD_PENDING; }
static int dimm_unplug_device(DeviceState *qdev) @@ -177,6 +180,7 @@ static int dimm_unplug_device(DeviceState *qdev) if (bus->dimm_hotplug) { bus->dimm_hotplug(bus->dimm_hotplug_qdev, DIMM(qdev), 0); } + DIMM(qdev)->pending = DIMM_REMOVE_PENDING; return 1; }
@@ -353,6 +357,7 @@ static int dimm_init(DeviceState *s) slot->start = slotcfg->start; slot->size = slotcfg->size; slot->node = slotcfg->node; + slot->pending = DIMM_NO_PENDING;
QTAILQ_INSERT_TAIL(&bus->dimmlist, slot, nextdimm); dimm_plug_device(slot); @@ -374,13 +379,34 @@ void dimm_notify(uint32_t idx, uint32_t event) result = g_malloc0(sizeof(*result)); slotcfg = dimmcfg_find_from_name(bus, slot->qdev.id); result->dimmname = slotcfg->name; + result->ret = event;
switch (event) { case DIMM_REMOVE_SUCCESS: + slot->pending = DIMM_NO_PENDING; qdev_unplug_complete((DeviceState *)slot, NULL); QTAILQ_REMOVE(&bus->dimmlist, slot, nextdimm); QTAILQ_INSERT_TAIL(&bus->dimm_hp_result_queue, result, next); break; + case DIMM_REMOVE_FAIL: + slot->pending = DIMM_NO_PENDING; + if (bus->dimm_revert) { + bus->dimm_revert(bus->dimm_hotplug_qdev, slot, 0); + } + QTAILQ_INSERT_TAIL(&bus->dimm_hp_result_queue, result, next); + break; + case DIMM_ADD_SUCCESS: + slot->pending = DIMM_NO_PENDING; + QTAILQ_INSERT_TAIL(&bus->dimm_hp_result_queue, result, next); + break; + case DIMM_ADD_FAIL: + slot->pending = DIMM_NO_PENDING; + if (bus->dimm_revert) { + bus->dimm_revert(bus->dimm_hotplug_qdev, slot, 1); + } + qdev_unplug_complete((DeviceState *)slot, NULL); + QTAILQ_REMOVE(&bus->dimmlist, slot, nextdimm); + QTAILQ_INSERT_TAIL(&bus->dimm_hp_result_queue, result, next); default: g_free(result); break; diff --git a/hw/dimm.h b/hw/dimm.h index 8f9546b..f43f745 100644 --- a/hw/dimm.h +++ b/hw/dimm.h @@ -18,6 +18,12 @@ typedef enum { DIMM_ADD_FAIL = 3 } dimm_hp_result_code;
+typedef enum { + DIMM_NO_PENDING = 0, + DIMM_ADD_PENDING = 1, + DIMM_REMOVE_PENDING = 2, +} dimm_hp_pending_code; + #define TYPE_DIMM "dimm" #define DIMM(obj) \ OBJECT_CHECK(DimmDevice, (obj), TYPE_DIMM) @@ -43,6 +49,7 @@ struct DimmDevice { uint32_t node; /* numa node proximity */ uint32_t populated; /* 1 means device has been hotplugged. Default is 0. */ MemoryRegion *mr; /* MemoryRegion for this slot. !NULL only if populated */ + dimm_hp_pending_code pending; /* pending hot operation for this dimm */ QTAILQ_ENTRY(DimmDevice) nextdimm; };
@@ -68,6 +75,7 @@ typedef struct DimmBus { DeviceState *dimm_hotplug_qdev; dimm_hotplug_fn dimm_hotplug; DimmConfiglist dimmconfig_list; + dimm_hotplug_fn dimm_revert; QTAILQ_HEAD(Dimmlist, DimmDevice) dimmlist; QTAILQ_HEAD(dimm_hp_result_head, dimm_hp_result) dimm_hp_result_queue; QLIST_ENTRY(DimmBus) next; @@ -79,7 +87,8 @@ struct dimm_hp_result { QTAILQ_ENTRY(dimm_hp_result) next; };
-void dimm_bus_hotplug(dimm_hotplug_fn hotplug, DeviceState *qdev); +void dimm_bus_hotplug(dimm_hotplug_fn hotplug, dimm_hotplug_fn revert, + DeviceState *qdev); void dimm_setup_fwcfg_layout(uint64_t *fw_cfg_slots); int dimm_add(char *id); DimmBus *dimm_bus_create(Object *parent, const char *name, uint32_t max_dimms,