From: Kenji Kaneshige kaneshige.kenji@jp.fujitsu.com
Currently, NMI interrupt is blindly sent to all the vCPUs when NMI button event happens. This doesn't properly emulate real hardware on which NMI button event triggers LINT1. Because of this, NMI is sent to the processor even when LINT1 is maskied in LVT. For example, this causes the problem that kdump initiated by NMI sometimes doesn't work on KVM, because kdump assumes NMI is masked on CPUs other than CPU0.
With this patch, inject-nmi request is handled as follows.
- When in-kernel irqchip is disabled, inject LINT1 instead of NMI interrupt. - When in-kernel irqchip is enabled, send nmi event to kernel as the current code does. LINT1 should be emulated in kernel.
Signed-off-by: Kenji Kaneshige kaneshige.kenji@jp.fujitsu.com Tested-by: Lai Jiangshan laijs@cn.fujitsu.com --- hw/apic.c | 16 ++++++++++++++++ hw/apic.h | 1 + monitor.c | 5 ++--- 3 files changed, 19 insertions(+), 3 deletions(-)
Index: qemu-kvm/hw/apic.c =================================================================== --- qemu-kvm.orig/hw/apic.c +++ qemu-kvm/hw/apic.c @@ -205,6 +205,22 @@ void apic_deliver_pic_intr(DeviceState * } }
+void apic_deliver_nmi(CPUState *env) +{ + APICState *apic; + + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + cpu_interrupt(env, CPU_INTERRUPT_NMI); + return; + } + + apic = DO_UPCAST(APICState, busdev.qdev, env->apic_state); + if (!apic) + cpu_interrupt(env, CPU_INTERRUPT_NMI); + else + apic_local_deliver(apic, APIC_LVT_LINT1); +} + #define foreach_apic(apic, deliver_bitmask, code) \ {\ int __i, __j, __mask;\ Index: qemu-kvm/hw/apic.h =================================================================== --- qemu-kvm.orig/hw/apic.h +++ qemu-kvm/hw/apic.h @@ -10,6 +10,7 @@ void apic_deliver_irq(uint8_t dest, uint uint8_t trigger_mode); int apic_accept_pic_intr(DeviceState *s); void apic_deliver_pic_intr(DeviceState *s, int level); +void apic_deliver_nmi(CPUState *env); int apic_get_interrupt(DeviceState *s); void apic_reset_irq_delivered(void); int apic_get_irq_delivered(void); Index: qemu-kvm/monitor.c =================================================================== --- qemu-kvm.orig/monitor.c +++ qemu-kvm/monitor.c @@ -2615,9 +2615,8 @@ static int do_inject_nmi(Monitor *mon, c { CPUState *env;
- for (env = first_cpu; env != NULL; env = env->next_cpu) { - cpu_interrupt(env, CPU_INTERRUPT_NMI); - } + for (env = first_cpu; env != NULL; env = env->next_cpu) + apic_deliver_nmi(env);
return 0; }
On 2011-10-10 08:06, Lai Jiangshan wrote:
From: Kenji Kaneshige kaneshige.kenji@jp.fujitsu.com
Currently, NMI interrupt is blindly sent to all the vCPUs when NMI button event happens. This doesn't properly emulate real hardware on which NMI button event triggers LINT1. Because of this, NMI is sent to the processor even when LINT1 is maskied in LVT. For example, this causes the problem that kdump initiated by NMI sometimes doesn't work on KVM, because kdump assumes NMI is masked on CPUs other than CPU0.
With this patch, inject-nmi request is handled as follows.
- When in-kernel irqchip is disabled, inject LINT1 instead of NMI interrupt.
- When in-kernel irqchip is enabled, send nmi event to kernel as the current code does. LINT1 should be emulated in kernel.
Signed-off-by: Kenji Kaneshige kaneshige.kenji@jp.fujitsu.com Tested-by: Lai Jiangshan laijs@cn.fujitsu.com
This is targeting uq/master?
Please make sure your patch passes checkpatch.pl
hw/apic.c | 16 ++++++++++++++++ hw/apic.h | 1 + monitor.c | 5 ++--- 3 files changed, 19 insertions(+), 3 deletions(-)
Index: qemu-kvm/hw/apic.c
--- qemu-kvm.orig/hw/apic.c +++ qemu-kvm/hw/apic.c @@ -205,6 +205,22 @@ void apic_deliver_pic_intr(DeviceState * } }
+void apic_deliver_nmi(CPUState *env) +{
- APICState *apic;
- if (kvm_enabled() && kvm_irqchip_in_kernel()) {
cpu_interrupt(env, CPU_INTERRUPT_NMI);
- return;
- }
- apic = DO_UPCAST(APICState, busdev.qdev, env->apic_state);
- if (!apic)
cpu_interrupt(env, CPU_INTERRUPT_NMI);
Testing for !apic and handling the non-APIC case here looks a bit strange. Let's move the !env->apic_state test to the caller to make it consistent with other APIC services.
The KVM case should be a separate qemu-kvm patch on top for now. (We may implement calls into APIC models differently when pushing in-kernel irqchip support upstream.)
Jan
Am 10.10.2011 08:49, schrieb Jan Kiszka:
On 2011-10-10 08:06, Lai Jiangshan wrote:
From: Kenji Kaneshigekaneshige.kenji@jp.fujitsu.com
Currently, NMI interrupt is blindly sent to all the vCPUs when NMI button event happens. This doesn't properly emulate real hardware on which NMI button event triggers LINT1. Because of this, NMI is sent to the processor even when LINT1 is maskied in LVT. For example, this
[...]
This is targeting uq/master?
Please make sure your patch passes checkpatch.pl
While at it: masked?
Andreas
Synchronize newest kernel headers which have KVM_CAP_IRQCHIP_LAPIC_NMI by ./scripts/update-linux-headers.sh
Signed-off-by: Lai Jiangshan laijs@cn.fujitsu.com --- linux-headers/asm | 1 + linux-headers/asm-powerpc/kvm.h | 19 +++++++++++++++++-- linux-headers/asm-x86/kvm_para.h | 14 ++++++++++++++ linux-headers/linux/kvm.h | 24 +++++++++++++++++------- linux-headers/linux/kvm_para.h | 1 + 5 files changed, 50 insertions(+), 9 deletions(-)
diff --git a/linux-headers/asm b/linux-headers/asm new file mode 120000 index 0000000..b8baa31 --- /dev/null +++ b/linux-headers/asm @@ -0,0 +1 @@ +/home/laijs/work/qemu-kvm/linux-headers/asm-x86 \ No newline at end of file diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index 777d307..a4f6c85 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -22,6 +22,10 @@
#include <linux/types.h>
+/* Select powerpc specific features in <linux/kvm.h> */ +#define __KVM_HAVE_SPAPR_TCE +#define __KVM_HAVE_PPC_SMT + struct kvm_regs { __u64 pc; __u64 cr; @@ -166,8 +170,8 @@ struct kvm_sregs { } ppc64; struct { __u32 sr[16]; - __u64 ibat[8]; - __u64 dbat[8]; + __u64 ibat[8]; + __u64 dbat[8]; } ppc32; } s; struct { @@ -272,4 +276,15 @@ struct kvm_guest_debug_arch { #define KVM_INTERRUPT_UNSET -2U #define KVM_INTERRUPT_SET_LEVEL -3U
+/* for KVM_CAP_SPAPR_TCE */ +struct kvm_create_spapr_tce { + __u64 liobn; + __u32 window_size; +}; + +/* for KVM_ALLOCATE_RMA */ +struct kvm_allocate_rma { + __u64 rma_size; +}; + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h index 834d71e..f2ac46a 100644 --- a/linux-headers/asm-x86/kvm_para.h +++ b/linux-headers/asm-x86/kvm_para.h @@ -21,6 +21,7 @@ */ #define KVM_FEATURE_CLOCKSOURCE2 3 #define KVM_FEATURE_ASYNC_PF 4 +#define KVM_FEATURE_STEAL_TIME 5
/* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. @@ -30,10 +31,23 @@ #define MSR_KVM_WALL_CLOCK 0x11 #define MSR_KVM_SYSTEM_TIME 0x12
+#define KVM_MSR_ENABLED 1 /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 +#define MSR_KVM_STEAL_TIME 0x4b564d03 + +struct kvm_steal_time { + __u64 steal; + __u32 version; + __u32 flags; + __u32 pad[12]; +}; + +#define KVM_STEAL_ALIGNMENT_BITS 5 +#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1))) +#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
#define KVM_MAX_MMU_OP_BATCH 32
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index fc63b73..f0fdaa9 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -161,6 +161,7 @@ struct kvm_pit_config { #define KVM_EXIT_NMI 16 #define KVM_EXIT_INTERNAL_ERROR 17 #define KVM_EXIT_OSI 18 +#define KVM_EXIT_PAPR_HCALL 19
/* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 @@ -264,6 +265,11 @@ struct kvm_run { struct { __u64 gprs[32]; } osi; + struct { + __u64 nr; + __u64 ret; + __u64 args[9]; + } papr_hcall; /* Fix the size of the union. */ char padding[256]; }; @@ -544,6 +550,13 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_TSC_CONTROL 60 #define KVM_CAP_GET_TSC_KHZ 61 #define KVM_CAP_PPC_BOOKE_SREGS 62 +#define KVM_CAP_SPAPR_TCE 63 +#define KVM_CAP_PPC_SMT 64 +#define KVM_CAP_PPC_RMA 65 +#define KVM_CAP_S390_GMAP 71 +#ifdef KVM_CAP_USER_NMI +#define KVM_CAP_LAPIC_NMI 72 +#endif
#ifdef KVM_CAP_IRQ_ROUTING
@@ -746,6 +759,9 @@ struct kvm_clock_data { /* Available with KVM_CAP_XCRS */ #define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs) #define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs) +#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) +/* Available with KVM_CAP_RMA */ +#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma)
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
@@ -773,20 +789,14 @@ struct kvm_assigned_pci_dev {
struct kvm_assigned_irq { __u32 assigned_dev_id; - __u32 host_irq; + __u32 host_irq; /* ignored (legacy field) */ __u32 guest_irq; __u32 flags; union { - struct { - __u32 addr_lo; - __u32 addr_hi; - __u32 data; - } guest_msi; __u32 reserved[12]; }; };
- struct kvm_assigned_msix_nr { __u32 assigned_dev_id; __u16 entry_nr; diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h index 7bdcf93..b315e27 100644 --- a/linux-headers/linux/kvm_para.h +++ b/linux-headers/linux/kvm_para.h @@ -26,3 +26,4 @@ #include <asm/kvm_para.h>
#endif /* __LINUX_KVM_PARA_H */ +
From: Kenji Kaneshige kaneshige.kenji@jp.fujitsu.com
Currently, NMI interrupt is blindly sent to all the vCPUs when NMI button event happens. This doesn't properly emulate real hardware on which NMI button event triggers LINT1. Because of this, NMI is sent to the processor even when LINT1 is maskied in LVT. For example, this causes the problem that kdump initiated by NMI sometimes doesn't work on KVM, because kdump assumes NMI is masked on CPUs other than CPU0.
With this patch, inject-nmi request is handled as follows.
- When in-kernel irqchip is disabled, inject LINT1 instead of NMI interrupt. - When in-kernel irqchip is enabled, send nmi event to kernel as the current code does. LINT1 should be emulated in kernel.
(laijs) changed from v1: use KVM_CAP_LAPIC_NMI adjust the pic_deliver_nmi() API
Signed-off-by: Kenji Kaneshige kaneshige.kenji@jp.fujitsu.com Tested-by: Lai Jiangshan laijs@cn.fujitsu.com --- hw/apic.c | 26 ++++++++++++++++++++++++++ hw/apic.h | 1 + monitor.c | 6 +++++- 3 files changed, 32 insertions(+), 1 deletions(-)
diff --git a/hw/apic.c b/hw/apic.c index 69d6ac5..76e8208 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -205,6 +205,32 @@ void apic_deliver_pic_intr(DeviceState *d, int level) } }
+void apic_deliver_nmi(DeviceState *d) +{ + APICState *s = DO_UPCAST(APICState, busdev.qdev, d); + +#ifdef KVM_CAP_LAPIC_NMI + static int kernel_lapic_nmi; + + if (kernel_lapic_nmi == 0) { + if (!kvm_enabled() || !kvm_irqchip_in_kernel() || + !kvm_check_extension(kvm_state, KVM_CAP_LAPIC_NMI)) { + kernel_lapic_nmi = -1; + } else { + kernel_lapic_nmi = 1; + } + } +#else + int kernel_lapic_nmi = -1; +#endif + + if (kernel_lapic_nmi == 1) { + cpu_interrupt(s->cpu_env, CPU_INTERRUPT_NMI); + } else { + apic_local_deliver(s, APIC_LVT_LINT1); + } +} + #define foreach_apic(apic, deliver_bitmask, code) \ {\ int __i, __j, __mask;\ diff --git a/hw/apic.h b/hw/apic.h index c857d52..3a4be0a 100644 --- a/hw/apic.h +++ b/hw/apic.h @@ -10,6 +10,7 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t trigger_mode); int apic_accept_pic_intr(DeviceState *s); void apic_deliver_pic_intr(DeviceState *s, int level); +void apic_deliver_nmi(DeviceState *d); int apic_get_interrupt(DeviceState *s); void apic_reset_irq_delivered(void); int apic_get_irq_delivered(void); diff --git a/monitor.c b/monitor.c index cb485bf..0b81f17 100644 --- a/monitor.c +++ b/monitor.c @@ -2616,7 +2616,11 @@ static int do_inject_nmi(Monitor *mon, const QDict *qdict, QObject **ret_data) CPUState *env;
for (env = first_cpu; env != NULL; env = env->next_cpu) { - cpu_interrupt(env, CPU_INTERRUPT_NMI); + if (!env->apic_state) { + cpu_interrupt(env, CPU_INTERRUPT_NMI); + } else { + apic_deliver_nmi(env->apic_state); + } }
return 0;