Merge git://github.com/agraf/linux-2.6.git kvm-ppc-next into queue

This commit is contained in:
Gleb Natapov 2013-04-28 12:50:07 +03:00
commit 064d1afaa5
53 changed files with 5549 additions and 449 deletions

View File

@ -1792,6 +1792,23 @@ registers, find a list below:
PPC | KVM_REG_PPC_TSR | 32
PPC | KVM_REG_PPC_OR_TSR | 32
PPC | KVM_REG_PPC_CLEAR_TSR | 32
PPC | KVM_REG_PPC_MAS0 | 32
PPC | KVM_REG_PPC_MAS1 | 32
PPC | KVM_REG_PPC_MAS2 | 64
PPC | KVM_REG_PPC_MAS7_3 | 64
PPC | KVM_REG_PPC_MAS4 | 32
PPC | KVM_REG_PPC_MAS6 | 32
PPC | KVM_REG_PPC_MMUCFG | 32
PPC | KVM_REG_PPC_TLB0CFG | 32
PPC | KVM_REG_PPC_TLB1CFG | 32
PPC | KVM_REG_PPC_TLB2CFG | 32
PPC | KVM_REG_PPC_TLB3CFG | 32
PPC | KVM_REG_PPC_TLB0PS | 32
PPC | KVM_REG_PPC_TLB1PS | 32
PPC | KVM_REG_PPC_TLB2PS | 32
PPC | KVM_REG_PPC_TLB3PS | 32
PPC | KVM_REG_PPC_EPTCFG | 32
PPC | KVM_REG_PPC_ICP_STATE | 64
ARM registers are mapped using the lower 32 bits. The upper 16 of that
is the register group type, or coprocessor number:
@ -2173,6 +2190,76 @@ header; first `n_valid' valid entries with contents from the data
written, then `n_invalid' invalid entries, invalidating any previously
valid entries found.
4.79 KVM_CREATE_DEVICE
Capability: KVM_CAP_DEVICE_CTRL
Type: vm ioctl
Parameters: struct kvm_create_device (in/out)
Returns: 0 on success, -1 on error
Errors:
ENODEV: The device type is unknown or unsupported
EEXIST: Device already created, and this type of device may not
be instantiated multiple times
Other error conditions may be defined by individual device types or
have their standard meanings.
Creates an emulated device in the kernel. The file descriptor returned
in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR.
If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the
device type is supported (not necessarily whether it can be created
in the current vm).
Individual devices should not define flags. Attributes should be used
for specifying any behavior that is not implied by the device type
number.
struct kvm_create_device {
__u32 type; /* in: KVM_DEV_TYPE_xxx */
__u32 fd; /* out: device handle */
__u32 flags; /* in: KVM_CREATE_DEVICE_xxx */
};
4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
Capability: KVM_CAP_DEVICE_CTRL
Type: device ioctl
Parameters: struct kvm_device_attr
Returns: 0 on success, -1 on error
Errors:
ENXIO: The group or attribute is unknown/unsupported for this device
EPERM: The attribute cannot (currently) be accessed this way
(e.g. read-only attribute, or attribute that only makes
sense when the device is in a different state)
Other error conditions may be defined by individual device types.
Gets/sets a specified piece of device configuration and/or state. The
semantics are device-specific. See individual device documentation in
the "devices" directory. As with ONE_REG, the size of the data
transferred is defined by the particular attribute.
struct kvm_device_attr {
__u32 flags; /* no flags currently defined */
__u32 group; /* device-defined */
__u64 attr; /* group-defined */
__u64 addr; /* userspace address of attr data */
};
4.81 KVM_HAS_DEVICE_ATTR
Capability: KVM_CAP_DEVICE_CTRL
Type: device ioctl
Parameters: struct kvm_device_attr
Returns: 0 on success, -1 on error
Errors:
ENXIO: The group or attribute is unknown/unsupported for this device
Tests whether a device supports a particular attribute. A successful
return indicates the attribute is implemented. It does not necessarily
indicate that the attribute can be read or written in the device's
current state. "addr" is ignored.
4.77 KVM_ARM_VCPU_INIT
@ -2255,6 +2342,25 @@ and distributor interface, the ioctl must be called after calling
KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling
this ioctl twice for any of the base addresses will return -EEXIST.
4.82 KVM_PPC_RTAS_DEFINE_TOKEN
Capability: KVM_CAP_PPC_RTAS
Architectures: ppc
Type: vm ioctl
Parameters: struct kvm_rtas_token_args
Returns: 0 on success, -1 on error
Defines a token value for a RTAS (Run Time Abstraction Services)
service in order to allow it to be handled in the kernel. The
argument struct gives the name of the service, which must be the name
of a service that has a kernel-side implementation. If the token
value is non-zero, it will be associated with that service, and
subsequent RTAS calls by the guest specifying that token will be
handled by the kernel. If the token value is 0, then any token
associated with the service will be forgotten, and subsequent RTAS
calls by the guest for that service will be passed to userspace to be
handled.
5. The kvm_run structure
------------------------
@ -2658,3 +2764,11 @@ to receive the topmost interrupt vector.
When disabled (args[0] == 0), behavior is as if this facility is unsupported.
When this capability is enabled, KVM_EXIT_EPR can occur.
6.6 KVM_CAP_IRQ_MPIC
Architectures: ppc
Parameters: args[0] is the MPIC device fd
args[1] is the MPIC CPU number for this vcpu
This capability connects the vcpu to an in-kernel MPIC device.

View File

@ -0,0 +1 @@
This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL.

View File

@ -0,0 +1,56 @@
MPIC interrupt controller
=========================
Device types supported:
KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0
KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2
Only one MPIC instance, of any type, may be instantiated. The created
MPIC will act as the system interrupt controller, connecting to each
vcpu's interrupt inputs.
Groups:
KVM_DEV_MPIC_GRP_MISC
Attributes:
KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit)
Base address of the 256 KiB MPIC register space. Must be
naturally aligned. A value of zero disables the mapping.
Reset value is zero.
KVM_DEV_MPIC_GRP_REGISTER (rw, 32-bit)
Access an MPIC register, as if the access were made from the guest.
"attr" is the byte offset into the MPIC register space. Accesses
must be 4-byte aligned.
MSIs may be signaled by using this attribute group to write
to the relevant MSIIR.
KVM_DEV_MPIC_GRP_IRQ_ACTIVE (rw, 32-bit)
IRQ input line for each standard openpic source. 0 is inactive and 1
is active, regardless of interrupt sense.
For edge-triggered interrupts: Writing 1 is considered an activating
edge, and writing 0 is ignored. Reading returns 1 if a previously
signaled edge has not been acknowledged, and 0 otherwise.
"attr" is the IRQ number. IRQ numbers for standard sources are the
byte offset of the relevant IVPR from EIVPR0, divided by 32.
IRQ Routing:
The MPIC emulation supports IRQ routing. Only a single MPIC device can
be instantiated. Once that device has been created, it's available as
irqchip id 0.
This irqchip 0 has 256 interrupt pins, which expose the interrupts in
the main array of interrupt sources (a.k.a. "SRC" interrupts).
The numbering is the same as the MPIC device tree binding -- based on
the register offset from the beginning of the sources array, without
regard to any subdivisions in chip documentation such as "internal"
or "external" interrupts.
Default routes are established for these pins, with the GSI being equal
to the pin number.
Access to non-SRC interrupts is not implemented through IRQ routing mechanisms.

View File

@ -26,6 +26,7 @@
#define KVM_USER_MEM_SLOTS 32
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
/* define exit reasons from vmm to kvm*/
#define EXIT_REASON_VM_PANIC 0

View File

@ -27,6 +27,7 @@ config KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
select KVM_APIC_ARCHITECTURE
select KVM_MMIO
---help---

View File

@ -49,7 +49,7 @@ ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o irq_comm.o assigned-dev.o)
coalesced_mmio.o irq_comm.o assigned-dev.o irqchip.o)
ifeq ($(CONFIG_IOMMU_API),y)
common-objs += $(addprefix ../../../virt/kvm/, iommu.o)

View File

@ -270,6 +270,9 @@
#define H_SET_MODE 0x31C
#define MAX_HCALL_OPCODE H_SET_MODE
/* Platform specific hcalls, used by KVM */
#define H_RTAS 0xf000
#ifndef __ASSEMBLY__
/**

View File

@ -142,6 +142,8 @@ extern int kvmppc_mmu_hv_init(void);
extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
unsigned int vec);
extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags);
extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
bool upper, u32 val);
@ -156,7 +158,8 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
unsigned long pte_index);
extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
unsigned long *nb_ret);
extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
unsigned long gpa, bool dirty);
extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel);
extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,

View File

@ -268,4 +268,17 @@ static inline int is_vrma_hpte(unsigned long hpte_v)
(HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
}
#ifdef CONFIG_KVM_BOOK3S_64_HV
/*
* Note modification of an HPTE; set the HPTE modified bit
* if anyone is interested.
*/
static inline void note_hpte_modification(struct kvm *kvm,
struct revmap_entry *rev)
{
if (atomic_read(&kvm->arch.hpte_mod_interest))
rev->guest_rpte |= HPTE_GR_MODIFIED;
}
#endif /* CONFIG_KVM_BOOK3S_64_HV */
#endif /* __ASM_KVM_BOOK3S_64_H__ */

View File

@ -20,6 +20,11 @@
#ifndef __ASM_KVM_BOOK3S_ASM_H__
#define __ASM_KVM_BOOK3S_ASM_H__
/* XICS ICP register offsets */
#define XICS_XIRR 4
#define XICS_MFRR 0xc
#define XICS_IPI 2 /* interrupt source # for IPIs */
#ifdef __ASSEMBLY__
#ifdef CONFIG_KVM_BOOK3S_HANDLER
@ -81,10 +86,11 @@ struct kvmppc_host_state {
#ifdef CONFIG_KVM_BOOK3S_64_HV
u8 hwthread_req;
u8 hwthread_state;
u8 host_ipi;
struct kvm_vcpu *kvm_vcpu;
struct kvmppc_vcore *kvm_vcore;
unsigned long xics_phys;
u32 saved_xirr;
u64 dabr;
u64 host_mmcr[3];
u32 host_pmc[8];

View File

@ -44,6 +44,10 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#endif
/* These values are internal and can be increased later */
#define KVM_NR_IRQCHIPS 1
#define KVM_IRQCHIP_NUM_PINS 256
#if !defined(CONFIG_KVM_440)
#include <linux/mmu_notifier.h>
@ -188,6 +192,10 @@ struct kvmppc_linear_info {
int type;
};
/* XICS components, defined in book3s_xics.c */
struct kvmppc_xics;
struct kvmppc_icp;
/*
* The reverse mapping array has one entry for each HPTE,
* which stores the guest's view of the second word of the HPTE
@ -255,6 +263,13 @@ struct kvm_arch {
#endif /* CONFIG_KVM_BOOK3S_64_HV */
#ifdef CONFIG_PPC_BOOK3S_64
struct list_head spapr_tce_tables;
struct list_head rtas_tokens;
#endif
#ifdef CONFIG_KVM_MPIC
struct openpic *mpic;
#endif
#ifdef CONFIG_KVM_XICS
struct kvmppc_xics *xics;
#endif
};
@ -301,11 +316,13 @@ struct kvmppc_vcore {
* that a guest can register.
*/
struct kvmppc_vpa {
unsigned long gpa; /* Current guest phys addr */
void *pinned_addr; /* Address in kernel linear mapping */
void *pinned_end; /* End of region */
unsigned long next_gpa; /* Guest phys addr for update */
unsigned long len; /* Number of bytes required */
u8 update_pending; /* 1 => update pinned_addr from next_gpa */
bool dirty; /* true => area has been modified by kernel */
};
struct kvmppc_pte {
@ -359,6 +376,11 @@ struct kvmppc_slb {
#define KVMPPC_BOOKE_MAX_IAC 4
#define KVMPPC_BOOKE_MAX_DAC 2
/* KVMPPC_EPR_USER takes precedence over KVMPPC_EPR_KERNEL */
#define KVMPPC_EPR_NONE 0 /* EPR not supported */
#define KVMPPC_EPR_USER 1 /* exit to userspace to fill EPR */
#define KVMPPC_EPR_KERNEL 2 /* in-kernel irqchip */
struct kvmppc_booke_debug_reg {
u32 dbcr0;
u32 dbcr1;
@ -370,6 +392,12 @@ struct kvmppc_booke_debug_reg {
u64 dac[KVMPPC_BOOKE_MAX_DAC];
};
#define KVMPPC_IRQ_DEFAULT 0
#define KVMPPC_IRQ_MPIC 1
#define KVMPPC_IRQ_XICS 2
struct openpic;
struct kvm_vcpu_arch {
ulong host_stack;
u32 host_pid;
@ -502,7 +530,9 @@ struct kvm_vcpu_arch {
spinlock_t wdt_lock;
struct timer_list wdt_timer;
u32 tlbcfg[4];
u32 tlbps[4];
u32 mmucfg;
u32 eptcfg;
u32 epr;
u32 crit_save;
struct kvmppc_booke_debug_reg dbg_reg;
@ -522,7 +552,7 @@ struct kvm_vcpu_arch {
u8 sane;
u8 cpu_type;
u8 hcall_needed;
u8 epr_enabled;
u8 epr_flags; /* KVMPPC_EPR_xxx */
u8 epr_needed;
u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
@ -549,6 +579,13 @@ struct kvm_vcpu_arch {
unsigned long magic_page_pa; /* phys addr to map the magic page to */
unsigned long magic_page_ea; /* effect. addr to map the magic page to */
int irq_type; /* one of KVM_IRQ_* */
int irq_cpu_id;
struct openpic *mpic; /* KVM_IRQ_MPIC */
#ifdef CONFIG_KVM_XICS
struct kvmppc_icp *icp; /* XICS presentation controller */
#endif
#ifdef CONFIG_KVM_BOOK3S_64_HV
struct kvm_vcpu_arch_shared shregs;
@ -589,5 +626,6 @@ struct kvm_vcpu_arch {
#define KVM_MMIO_REG_FQPR 0x0060
#define __KVM_HAVE_ARCH_WQP
#define __KVM_HAVE_CREATE_DEVICE
#endif /* __POWERPC_KVM_HOST_H__ */

View File

@ -44,7 +44,7 @@ enum emulation_result {
EMULATE_DO_DCR, /* kvm_run filled with DCR request */
EMULATE_FAIL, /* can't emulate this instruction */
EMULATE_AGAIN, /* something went wrong. go again */
EMULATE_DO_PAPR, /* kvm_run filled with PAPR request */
EMULATE_EXIT_USER, /* emulation requires exit to user-space */
};
extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
@ -130,6 +130,7 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm,
extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot, unsigned long porder);
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
@ -164,6 +165,18 @@ extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server,
u32 priority);
extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
u32 *priority);
extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
/*
* Cuts out inst bits with ordering according to spec.
* That means the leftmost bit is zero. All given bits are included.
@ -245,12 +258,29 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
struct openpic;
#ifdef CONFIG_KVM_BOOK3S_64_HV
static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
{
paca[cpu].kvm_hstate.xics_phys = addr;
}
static inline u32 kvmppc_get_xics_latch(void)
{
u32 xirr = get_paca()->kvm_hstate.saved_xirr;
get_paca()->kvm_hstate.saved_xirr = 0;
return xirr;
}
static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
{
paca[cpu].kvm_hstate.host_ipi = host_ipi;
}
extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
extern void kvm_linear_init(void);
#else
@ -259,6 +289,44 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
static inline void kvm_linear_init(void)
{}
static inline u32 kvmppc_get_xics_latch(void)
{
return 0;
}
static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
{}
static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
{
kvm_vcpu_kick(vcpu);
}
#endif
#ifdef CONFIG_KVM_XICS
static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{
return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
}
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
#else
static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
static inline int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu,
unsigned long server)
{ return -EINVAL; }
static inline int kvm_vm_ioctl_xics_irq(struct kvm *kvm,
struct kvm_irq_level *args)
{ return -ENOTTY; }
static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{ return 0; }
#endif
static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
@ -270,6 +338,32 @@ static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
#endif
}
#ifdef CONFIG_KVM_MPIC
void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu);
int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
u32 cpu);
void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu);
#else
static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
{
}
static inline int kvmppc_mpic_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu)
{
return -EINVAL;
}
static inline void kvmppc_mpic_disconnect_vcpu(struct openpic *opp,
struct kvm_vcpu *vcpu)
{
}
#endif /* CONFIG_KVM_MPIC */
int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
struct kvm_config_tlb *cfg);
int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
@ -282,8 +376,15 @@ void kvmppc_init_lpid(unsigned long nr_lpids);
static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
{
/* Clear i-cache for new pages */
struct page *page;
/*
* We can only access pages that the kernel maps
* as memory. Bail out for unmapped ones.
*/
if (!pfn_valid(pfn))
return;
/* Clear i-cache for new pages */
page = pfn_to_page(pfn);
if (!test_bit(PG_arch_1, &page->flags)) {
flush_dcache_icache_page(page);
@ -323,4 +424,6 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
return ea;
}
extern void xics_wake_cpu(int cpu);
#endif /* __POWERPC_KVM_PPC_H__ */

View File

@ -290,6 +290,7 @@
#define LPCR_PECE1 0x00002000 /* decrementer can cause exit */
#define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */
#define LPCR_MER 0x00000800 /* Mediated External Exception */
#define LPCR_MER_SH 11
#define LPCR_LPES 0x0000000c
#define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */
#define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */

View File

@ -25,6 +25,8 @@
/* Select powerpc specific features in <linux/kvm.h> */
#define __KVM_HAVE_SPAPR_TCE
#define __KVM_HAVE_PPC_SMT
#define __KVM_HAVE_IRQCHIP
#define __KVM_HAVE_IRQ_LINE
struct kvm_regs {
__u64 pc;
@ -272,8 +274,31 @@ struct kvm_debug_exit_arch {
/* for KVM_SET_GUEST_DEBUG */
struct kvm_guest_debug_arch {
struct {
/* H/W breakpoint/watchpoint address */
__u64 addr;
/*
* Type denotes h/w breakpoint, read watchpoint, write
* watchpoint or watchpoint (both read and write).
*/
#define KVMPPC_DEBUG_NONE 0x0
#define KVMPPC_DEBUG_BREAKPOINT (1UL << 1)
#define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2)
#define KVMPPC_DEBUG_WATCH_READ (1UL << 3)
__u32 type;
__u32 reserved;
} bp[16];
};
/* Debug related defines */
/*
* kvm_guest_debug->control is a 32 bit field. The lower 16 bits are generic
* and upper 16 bits are architecture specific. Architecture specific defines
* that ioctl is for setting hardware breakpoint or software breakpoint.
*/
#define KVM_GUESTDBG_USE_SW_BP 0x00010000
#define KVM_GUESTDBG_USE_HW_BP 0x00020000
/* definition of registers in kvm_run */
struct kvm_sync_regs {
};
@ -299,6 +324,12 @@ struct kvm_allocate_rma {
__u64 rma_size;
};
/* for KVM_CAP_PPC_RTAS */
struct kvm_rtas_token_args {
char name[120];
__u64 token; /* Use a token of 0 to undefine a mapping */
};
struct kvm_book3e_206_tlb_entry {
__u32 mas8;
__u32 mas1;
@ -359,6 +390,26 @@ struct kvm_get_htab_header {
__u16 n_invalid;
};
/* Per-vcpu XICS interrupt controller state */
#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
#define KVM_REG_PPC_ICP_CPPR_SHIFT 56 /* current proc priority */
#define KVM_REG_PPC_ICP_CPPR_MASK 0xff
#define KVM_REG_PPC_ICP_XISR_SHIFT 32 /* interrupt status field */
#define KVM_REG_PPC_ICP_XISR_MASK 0xffffff
#define KVM_REG_PPC_ICP_MFRR_SHIFT 24 /* pending IPI priority */
#define KVM_REG_PPC_ICP_MFRR_MASK 0xff
#define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */
#define KVM_REG_PPC_ICP_PPRI_MASK 0xff
/* Device control API: PPC-specific devices */
#define KVM_DEV_MPIC_GRP_MISC 1
#define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */
#define KVM_DEV_MPIC_GRP_REGISTER 2 /* 32-bit */
#define KVM_DEV_MPIC_GRP_IRQ_ACTIVE 3 /* 32-bit */
/* One-Reg API: PPC-specific registers */
#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
#define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
#define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
@ -426,4 +477,26 @@ struct kvm_get_htab_header {
/* Debugging: Special instruction for software breakpoint */
#define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b)
/* MMU registers */
#define KVM_REG_PPC_MAS0 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8c)
#define KVM_REG_PPC_MAS1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8d)
#define KVM_REG_PPC_MAS2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8e)
#define KVM_REG_PPC_MAS7_3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8f)
#define KVM_REG_PPC_MAS4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x90)
#define KVM_REG_PPC_MAS6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x91)
#define KVM_REG_PPC_MMUCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x92)
/*
* TLBnCFG fields TLBnCFG_N_ENTRY and TLBnCFG_ASSOC can be changed only using
* KVM_CAP_SW_TLB ioctl
*/
#define KVM_REG_PPC_TLB0CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x93)
#define KVM_REG_PPC_TLB1CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94)
#define KVM_REG_PPC_TLB2CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95)
#define KVM_REG_PPC_TLB3CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96)
#define KVM_REG_PPC_TLB0PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x97)
#define KVM_REG_PPC_TLB1PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98)
#define KVM_REG_PPC_TLB2PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99)
#define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
#define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
#endif /* __LINUX_KVM_POWERPC_H */

View File

@ -477,6 +477,7 @@ int main(void)
DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
#endif
#ifdef CONFIG_PPC_BOOK3S
DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
@ -573,6 +574,8 @@ int main(void)
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
HSTATE_FIELD(HSTATE_PMC, host_pmc);
HSTATE_FIELD(HSTATE_PURR, host_purr);

View File

@ -124,6 +124,18 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return kvmppc_set_sregs_ivor(vcpu, sregs);
}
int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
{
return -EINVAL;
}
int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
{
return -EINVAL;
}
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvmppc_vcpu_44x *vcpu_44x;

View File

@ -136,21 +136,41 @@ config KVM_E500V2
If unsure, say N.
config KVM_E500MC
bool "KVM support for PowerPC E500MC/E5500 processors"
bool "KVM support for PowerPC E500MC/E5500/E6500 processors"
depends on PPC_E500MC
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
select MMU_NOTIFIER
---help---
Support running unmodified E500MC/E5500 (32-bit) guest kernels in
virtual machines on E500MC/E5500 host processors.
Support running unmodified E500MC/E5500/E6500 guest kernels in
virtual machines on E500MC/E5500/E6500 host processors.
This module provides access to the hardware capabilities through
a character device node named /dev/kvm.
If unsure, say N.
config KVM_MPIC
bool "KVM in-kernel MPIC emulation"
depends on KVM && E500
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_MSI
help
Enable support for emulating MPIC devices inside the
host kernel, rather than relying on userspace to emulate.
Currently, support is limited to certain versions of
Freescale's MPIC implementation.
config KVM_XICS
bool "KVM in-kernel XICS emulation"
depends on KVM_BOOK3S_64 && !KVM_MPIC
---help---
Include support for the XICS (eXternal Interrupt Controller
Specification) interrupt controller architecture used on
IBM POWER (pSeries) servers.
source drivers/vhost/Kconfig
endif # VIRTUALIZATION

View File

@ -72,12 +72,18 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_hv.o \
book3s_hv_interrupts.o \
book3s_64_mmu_hv.o
kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
book3s_hv_rm_xics.o
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_hv_rmhandlers.o \
book3s_hv_rm_mmu.o \
book3s_64_vio_hv.o \
book3s_hv_ras.o \
book3s_hv_builtin.o
book3s_hv_builtin.o \
$(kvm-book3s_64-builtin-xics-objs-y)
kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
book3s_xics.o
kvm-book3s_64-module-objs := \
../../../virt/kvm/kvm_main.o \
@ -86,6 +92,7 @@ kvm-book3s_64-module-objs := \
emulate.o \
book3s.o \
book3s_64_vio.o \
book3s_rtas.o \
$(kvm-book3s_64-objs-y)
kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
@ -103,6 +110,9 @@ kvm-book3s_32-objs := \
book3s_32_mmu.o
kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o)
kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
obj-$(CONFIG_KVM_440) += kvm.o

View File

@ -104,7 +104,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
return prio;
}
static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
unsigned int vec)
{
unsigned long old_pending = vcpu->arch.pending_exceptions;
@ -535,6 +535,15 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
&opcode, sizeof(u32));
break;
}
#ifdef CONFIG_KVM_XICS
case KVM_REG_PPC_ICP_STATE:
if (!vcpu->arch.icp) {
r = -ENXIO;
break;
}
val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));
break;
#endif /* CONFIG_KVM_XICS */
default:
r = -EINVAL;
break;
@ -597,6 +606,16 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
break;
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_KVM_XICS
case KVM_REG_PPC_ICP_STATE:
if (!vcpu->arch.icp) {
r = -ENXIO;
break;
}
r = kvmppc_xics_set_icp(vcpu,
set_reg_val(reg->id, val));
break;
#endif /* CONFIG_KVM_XICS */
default:
r = -EINVAL;
break;
@ -612,6 +631,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
return 0;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
return -EINVAL;
}
void kvmppc_decrementer_func(unsigned long data)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;

View File

@ -893,7 +893,10 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
/* Harvest R and C */
rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
rev[i].guest_rpte = ptel | rcbits;
if (rcbits & ~rev[i].guest_rpte) {
rev[i].guest_rpte = ptel | rcbits;
note_hpte_modification(kvm, &rev[i]);
}
}
unlock_rmap(rmapp);
hptep[0] &= ~HPTE_V_HVLOCK;
@ -976,7 +979,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
/* Now check and modify the HPTE */
if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) {
kvmppc_clear_ref_hpte(kvm, hptep, i);
rev[i].guest_rpte |= HPTE_R_R;
if (!(rev[i].guest_rpte & HPTE_R_R)) {
rev[i].guest_rpte |= HPTE_R_R;
note_hpte_modification(kvm, &rev[i]);
}
ret = 1;
}
hptep[0] &= ~HPTE_V_HVLOCK;
@ -1080,7 +1086,10 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
hptep[1] &= ~HPTE_R_C;
eieio();
hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
rev[i].guest_rpte |= HPTE_R_C;
if (!(rev[i].guest_rpte & HPTE_R_C)) {
rev[i].guest_rpte |= HPTE_R_C;
note_hpte_modification(kvm, &rev[i]);
}
ret = 1;
}
hptep[0] &= ~HPTE_V_HVLOCK;
@ -1090,11 +1099,30 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
return ret;
}
static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
struct kvm_memory_slot *memslot,
unsigned long *map)
{
unsigned long gfn;
if (!vpa->dirty || !vpa->pinned_addr)
return;
gfn = vpa->gpa >> PAGE_SHIFT;
if (gfn < memslot->base_gfn ||
gfn >= memslot->base_gfn + memslot->npages)
return;
vpa->dirty = false;
if (map)
__set_bit_le(gfn - memslot->base_gfn, map);
}
long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long *map)
{
unsigned long i;
unsigned long *rmapp;
struct kvm_vcpu *vcpu;
preempt_disable();
rmapp = memslot->arch.rmap;
@ -1103,6 +1131,15 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
__set_bit_le(i, map);
++rmapp;
}
/* Harvest dirty bits from VPA and DTL updates */
/* Note: we never modify the SLB shadow buffer areas */
kvm_for_each_vcpu(i, vcpu, kvm) {
spin_lock(&vcpu->arch.vpa_update_lock);
harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map);
harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map);
spin_unlock(&vcpu->arch.vpa_update_lock);
}
preempt_enable();
return 0;
}
@ -1114,7 +1151,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
unsigned long gfn = gpa >> PAGE_SHIFT;
struct page *page, *pages[1];
int npages;
unsigned long hva, psize, offset;
unsigned long hva, offset;
unsigned long pa;
unsigned long *physp;
int srcu_idx;
@ -1146,14 +1183,9 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
psize = PAGE_SIZE;
if (PageHuge(page)) {
page = compound_head(page);
psize <<= compound_order(page);
}
offset = gpa & (psize - 1);
offset = gpa & (PAGE_SIZE - 1);
if (nb_ret)
*nb_ret = psize - offset;
*nb_ret = PAGE_SIZE - offset;
return page_address(page) + offset;
err:
@ -1161,11 +1193,31 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
return NULL;
}
void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
bool dirty)
{
struct page *page = virt_to_page(va);
struct kvm_memory_slot *memslot;
unsigned long gfn;
unsigned long *rmap;
int srcu_idx;
put_page(page);
if (!dirty || !kvm->arch.using_mmu_notifiers)
return;
/* We need to mark this page dirty in the rmap chain */
gfn = gpa >> PAGE_SHIFT;
srcu_idx = srcu_read_lock(&kvm->srcu);
memslot = gfn_to_memslot(kvm, gfn);
if (memslot) {
rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
lock_rmap(rmap);
*rmap |= KVMPPC_RMAP_CHANGED;
unlock_rmap(rmap);
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
}
/*
@ -1193,16 +1245,36 @@ struct kvm_htab_ctx {
#define HPTE_SIZE (2 * sizeof(unsigned long))
/*
* Returns 1 if this HPT entry has been modified or has pending
* R/C bit changes.
*/
static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp)
{
unsigned long rcbits_unset;
if (revp->guest_rpte & HPTE_GR_MODIFIED)
return 1;
/* Also need to consider changes in reference and changed bits */
rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset))
return 1;
return 0;
}
static long record_hpte(unsigned long flags, unsigned long *hptp,
unsigned long *hpte, struct revmap_entry *revp,
int want_valid, int first_pass)
{
unsigned long v, r;
unsigned long rcbits_unset;
int ok = 1;
int valid, dirty;
/* Unmodified entries are uninteresting except on the first pass */
dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
dirty = hpte_dirty(revp, hptp);
if (!first_pass && !dirty)
return 0;
@ -1223,16 +1295,28 @@ static long record_hpte(unsigned long flags, unsigned long *hptp,
while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
cpu_relax();
v = hptp[0];
/* re-evaluate valid and dirty from synchronized HPTE value */
valid = !!(v & HPTE_V_VALID);
dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
/* Harvest R and C into guest view if necessary */
rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
if (valid && (rcbits_unset & hptp[1])) {
revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) |
HPTE_GR_MODIFIED;
dirty = 1;
}
if (v & HPTE_V_ABSENT) {
v &= ~HPTE_V_ABSENT;
v |= HPTE_V_VALID;
valid = 1;
}
/* re-evaluate valid and dirty from synchronized HPTE value */
valid = !!(v & HPTE_V_VALID);
if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
valid = 0;
r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C));
dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
r = revp->guest_rpte;
/* only clear modified if this is the right sort of entry */
if (valid == want_valid && dirty) {
r &= ~HPTE_GR_MODIFIED;
@ -1288,7 +1372,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
/* Skip uninteresting entries, i.e. clean on not-first pass */
if (!first_pass) {
while (i < kvm->arch.hpt_npte &&
!(revp->guest_rpte & HPTE_GR_MODIFIED)) {
!hpte_dirty(revp, hptp)) {
++i;
hptp += 2;
++revp;

View File

@ -194,7 +194,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
run->papr_hcall.args[i] = gpr;
}
emulated = EMULATE_DO_PAPR;
run->exit_reason = KVM_EXIT_PAPR_HCALL;
vcpu->arch.hcall_needed = 1;
emulated = EMULATE_EXIT_USER;
break;
}
#endif

View File

@ -66,6 +66,31 @@
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
{
int me;
int cpu = vcpu->cpu;
wait_queue_head_t *wqp;
wqp = kvm_arch_vcpu_wq(vcpu);
if (waitqueue_active(wqp)) {
wake_up_interruptible(wqp);
++vcpu->stat.halt_wakeup;
}
me = get_cpu();
/* CPU points to the first thread of the core */
if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
int real_cpu = cpu + vcpu->arch.ptid;
if (paca[real_cpu].kvm_hstate.xics_phys)
xics_wake_cpu(real_cpu);
else if (cpu_online(cpu))
smp_send_reschedule(cpu);
}
put_cpu();
}
/*
* We use the vcpu_load/put functions to measure stolen time.
* Stolen time is counted as time when either the vcpu is able to
@ -259,7 +284,7 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
len = ((struct reg_vpa *)va)->length.hword;
else
len = ((struct reg_vpa *)va)->length.word;
kvmppc_unpin_guest_page(kvm, va);
kvmppc_unpin_guest_page(kvm, va, vpa, false);
/* Check length */
if (len > nb || len < sizeof(struct reg_vpa))
@ -359,13 +384,13 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
va = NULL;
nb = 0;
if (gpa)
va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
va = kvmppc_pin_guest_page(kvm, gpa, &nb);
spin_lock(&vcpu->arch.vpa_update_lock);
if (gpa == vpap->next_gpa)
break;
/* sigh... unpin that one and try again */
if (va)
kvmppc_unpin_guest_page(kvm, va);
kvmppc_unpin_guest_page(kvm, va, gpa, false);
}
vpap->update_pending = 0;
@ -375,12 +400,15 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
* has changed the mappings underlying guest memory,
* so unregister the region.
*/
kvmppc_unpin_guest_page(kvm, va);
kvmppc_unpin_guest_page(kvm, va, gpa, false);
va = NULL;
}
if (vpap->pinned_addr)
kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
vpap->dirty);
vpap->gpa = gpa;
vpap->pinned_addr = va;
vpap->dirty = false;
if (va)
vpap->pinned_end = va + vpap->len;
}
@ -472,6 +500,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
/* order writing *dt vs. writing vpa->dtl_idx */
smp_wmb();
vpa->dtl_idx = ++vcpu->arch.dtl_index;
vcpu->arch.dtl.dirty = true;
}
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
@ -479,7 +508,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
unsigned long req = kvmppc_get_gpr(vcpu, 3);
unsigned long target, ret = H_SUCCESS;
struct kvm_vcpu *tvcpu;
int idx;
int idx, rc;
switch (req) {
case H_ENTER:
@ -515,6 +544,28 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
break;
case H_RTAS:
if (list_empty(&vcpu->kvm->arch.rtas_tokens))
return RESUME_HOST;
rc = kvmppc_rtas_hcall(vcpu);
if (rc == -ENOENT)
return RESUME_HOST;
else if (rc == 0)
break;
/* Send the error out to userspace via KVM_RUN */
return rc;
case H_XIRR:
case H_CPPR:
case H_EOI:
case H_IPI:
if (kvmppc_xics_enabled(vcpu)) {
ret = kvmppc_xics_hcall(vcpu, req);
break;
} /* fallthrough */
default:
return RESUME_HOST;
}
@ -913,15 +964,19 @@ out:
return ERR_PTR(err);
}
static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
{
if (vpa->pinned_addr)
kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa,
vpa->dirty);
}
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
{
spin_lock(&vcpu->arch.vpa_update_lock);
if (vcpu->arch.dtl.pinned_addr)
kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr);
if (vcpu->arch.slb_shadow.pinned_addr)
kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
if (vcpu->arch.vpa.pinned_addr)
kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
spin_unlock(&vcpu->arch.vpa_update_lock);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
@ -955,7 +1010,6 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
}
extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
extern void xics_wake_cpu(int cpu);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
struct kvm_vcpu *vcpu)
@ -1330,9 +1384,12 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
break;
vc->runner = vcpu;
n_ceded = 0;
list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
if (!v->arch.pending_exceptions)
n_ceded += v->arch.ceded;
else
v->arch.ceded = 0;
}
if (n_ceded == vc->n_runnable)
kvmppc_vcore_blocked(vc);
else
@ -1821,6 +1878,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
cpumask_setall(&kvm->arch.need_tlb_flush);
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
kvm->arch.rma = NULL;
@ -1866,6 +1924,8 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
kvm->arch.rma = NULL;
}
kvmppc_rtas_tokens_free(kvm);
kvmppc_free_hpt(kvm);
WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
}

View File

@ -97,17 +97,6 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
}
EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
/*
* Note modification of an HPTE; set the HPTE modified bit
* if anyone is interested.
*/
static inline void note_hpte_modification(struct kvm *kvm,
struct revmap_entry *rev)
{
if (atomic_read(&kvm->arch.hpte_mod_interest))
rev->guest_rpte |= HPTE_GR_MODIFIED;
}
/* Remove this HPTE from the chain for a real page */
static void remove_revmap_chain(struct kvm *kvm, long pte_index,
struct revmap_entry *rev,

View File

@ -0,0 +1,406 @@
/*
* Copyright 2012 Michael Ellerman, IBM Corporation.
* Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/kvm_host.h>
#include <linux/err.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
#include <asm/hvcall.h>
#include <asm/xics.h>
#include <asm/debug.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
#include "book3s_xics.h"
#define DEBUG_PASSUP
static inline void rm_writeb(unsigned long paddr, u8 val)
{
__asm__ __volatile__("sync; stbcix %0,0,%1"
: : "r" (val), "r" (paddr) : "memory");
}
static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
struct kvm_vcpu *this_vcpu)
{
struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
unsigned long xics_phys;
int cpu;
/* Mark the target VCPU as having an interrupt pending */
vcpu->stat.queue_intr++;
set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
/* Kick self ? Just set MER and return */
if (vcpu == this_vcpu) {
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER);
return;
}
/* Check if the core is loaded, if not, too hard */
cpu = vcpu->cpu;
if (cpu < 0 || cpu >= nr_cpu_ids) {
this_icp->rm_action |= XICS_RM_KICK_VCPU;
this_icp->rm_kick_target = vcpu;
return;
}
/* In SMT cpu will always point to thread 0, we adjust it */
cpu += vcpu->arch.ptid;
/* Not too hard, then poke the target */
xics_phys = paca[cpu].kvm_hstate.xics_phys;
rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
}
static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
{
/* Note: Only called on self ! */
clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
&vcpu->arch.pending_exceptions);
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
}
static inline bool icp_rm_try_update(struct kvmppc_icp *icp,
union kvmppc_icp_state old,
union kvmppc_icp_state new)
{
struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu;
bool success;
/* Calculate new output value */
new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
/* Attempt atomic update */
success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
if (!success)
goto bail;
/*
* Check for output state update
*
* Note that this is racy since another processor could be updating
* the state already. This is why we never clear the interrupt output
* here, we only ever set it. The clear only happens prior to doing
* an update and only by the processor itself. Currently we do it
* in Accept (H_XIRR) and Up_Cppr (H_XPPR).
*
* We also do not try to figure out whether the EE state has changed,
* we unconditionally set it if the new state calls for it. The reason
* for that is that we opportunistically remove the pending interrupt
* flag when raising CPPR, so we need to set it back here if an
* interrupt is still pending.
*/
if (new.out_ee)
icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu);
/* Expose the state change for debug purposes */
this_vcpu->arch.icp->rm_dbgstate = new;
this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu;
bail:
return success;
}
static inline int check_too_hard(struct kvmppc_xics *xics,
struct kvmppc_icp *icp)
{
return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS;
}
static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
u8 new_cppr)
{
union kvmppc_icp_state old_state, new_state;
bool resend;
/*
* This handles several related states in one operation:
*
* ICP State: Down_CPPR
*
* Load CPPR with new value and if the XISR is 0
* then check for resends:
*
* ICP State: Resend
*
* If MFRR is more favored than CPPR, check for IPIs
* and notify ICS of a potential resend. This is done
* asynchronously (when used in real mode, we will have
* to exit here).
*
* We do not handle the complete Check_IPI as documented
* here. In the PAPR, this state will be used for both
* Set_MFRR and Down_CPPR. However, we know that we aren't
* changing the MFRR state here so we don't need to handle
* the case of an MFRR causing a reject of a pending irq,
* this will have been handled when the MFRR was set in the
* first place.
*
* Thus we don't have to handle rejects, only resends.
*
* When implementing real mode for HV KVM, resend will lead to
* a H_TOO_HARD return and the whole transaction will be handled
* in virtual mode.
*/
do {
old_state = new_state = ACCESS_ONCE(icp->state);
/* Down_CPPR */
new_state.cppr = new_cppr;
/*
* Cut down Resend / Check_IPI / IPI
*
* The logic is that we cannot have a pending interrupt
* trumped by an IPI at this point (see above), so we
* know that either the pending interrupt is already an
* IPI (in which case we don't care to override it) or
* it's either more favored than us or non existent
*/
if (new_state.mfrr < new_cppr &&
new_state.mfrr <= new_state.pending_pri) {
new_state.pending_pri = new_state.mfrr;
new_state.xisr = XICS_IPI;
}
/* Latch/clear resend bit */
resend = new_state.need_resend;
new_state.need_resend = 0;
} while (!icp_rm_try_update(icp, old_state, new_state));
/*
* Now handle resend checks. Those are asynchronous to the ICP
* state update in HW (ie bus transactions) so we can handle them
* separately here as well.
*/
if (resend)
icp->rm_action |= XICS_RM_CHECK_RESEND;
}
unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
{
union kvmppc_icp_state old_state, new_state;
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp;
u32 xirr;
if (!xics || !xics->real_mode)
return H_TOO_HARD;
/* First clear the interrupt */
icp_rm_clr_vcpu_irq(icp->vcpu);
/*
* ICP State: Accept_Interrupt
*
* Return the pending interrupt (if any) along with the
* current CPPR, then clear the XISR & set CPPR to the
* pending priority
*/
do {
old_state = new_state = ACCESS_ONCE(icp->state);
xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
if (!old_state.xisr)
break;
new_state.cppr = new_state.pending_pri;
new_state.pending_pri = 0xff;
new_state.xisr = 0;
} while (!icp_rm_try_update(icp, old_state, new_state));
/* Return the result in GPR4 */
vcpu->arch.gpr[4] = xirr;
return check_too_hard(xics, icp);
}
int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr)
{
union kvmppc_icp_state old_state, new_state;
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp, *this_icp = vcpu->arch.icp;
u32 reject;
bool resend;
bool local;
if (!xics || !xics->real_mode)
return H_TOO_HARD;
local = this_icp->server_num == server;
if (local)
icp = this_icp;
else
icp = kvmppc_xics_find_server(vcpu->kvm, server);
if (!icp)
return H_PARAMETER;
/*
* ICP state: Set_MFRR
*
* If the CPPR is more favored than the new MFRR, then
* nothing needs to be done as there can be no XISR to
* reject.
*
* If the CPPR is less favored, then we might be replacing
* an interrupt, and thus need to possibly reject it as in
*
* ICP state: Check_IPI
*/
do {
old_state = new_state = ACCESS_ONCE(icp->state);
/* Set_MFRR */
new_state.mfrr = mfrr;
/* Check_IPI */
reject = 0;
resend = false;
if (mfrr < new_state.cppr) {
/* Reject a pending interrupt if not an IPI */
if (mfrr <= new_state.pending_pri)
reject = new_state.xisr;
new_state.pending_pri = mfrr;
new_state.xisr = XICS_IPI;
}
if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
resend = new_state.need_resend;
new_state.need_resend = 0;
}
} while (!icp_rm_try_update(icp, old_state, new_state));
/* Pass rejects to virtual mode */
if (reject && reject != XICS_IPI) {
this_icp->rm_action |= XICS_RM_REJECT;
this_icp->rm_reject = reject;
}
/* Pass resends to virtual mode */
if (resend)
this_icp->rm_action |= XICS_RM_CHECK_RESEND;
return check_too_hard(xics, this_icp);
}
int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
{
union kvmppc_icp_state old_state, new_state;
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp;
u32 reject;
if (!xics || !xics->real_mode)
return H_TOO_HARD;
/*
* ICP State: Set_CPPR
*
* We can safely compare the new value with the current
* value outside of the transaction as the CPPR is only
* ever changed by the processor on itself
*/
if (cppr > icp->state.cppr) {
icp_rm_down_cppr(xics, icp, cppr);
goto bail;
} else if (cppr == icp->state.cppr)
return H_SUCCESS;
/*
* ICP State: Up_CPPR
*
* The processor is raising its priority, this can result
* in a rejection of a pending interrupt:
*
* ICP State: Reject_Current
*
* We can remove EE from the current processor, the update
* transaction will set it again if needed
*/
icp_rm_clr_vcpu_irq(icp->vcpu);
do {
old_state = new_state = ACCESS_ONCE(icp->state);
reject = 0;
new_state.cppr = cppr;
if (cppr <= new_state.pending_pri) {
reject = new_state.xisr;
new_state.xisr = 0;
new_state.pending_pri = 0xff;
}
} while (!icp_rm_try_update(icp, old_state, new_state));
/* Pass rejects to virtual mode */
if (reject && reject != XICS_IPI) {
icp->rm_action |= XICS_RM_REJECT;
icp->rm_reject = reject;
}
bail:
return check_too_hard(xics, icp);
}
int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
{
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp;
struct kvmppc_ics *ics;
struct ics_irq_state *state;
u32 irq = xirr & 0x00ffffff;
u16 src;
if (!xics || !xics->real_mode)
return H_TOO_HARD;
/*
* ICP State: EOI
*
* Note: If EOI is incorrectly used by SW to lower the CPPR
* value (ie more favored), we do not check for rejection of
* a pending interrupt, this is a SW error and PAPR sepcifies
* that we don't have to deal with it.
*
* The sending of an EOI to the ICS is handled after the
* CPPR update
*
* ICP State: Down_CPPR which we handle
* in a separate function as it's shared with H_CPPR.
*/
icp_rm_down_cppr(xics, icp, xirr >> 24);
/* IPIs have no EOI */
if (irq == XICS_IPI)
goto bail;
/*
* EOI handling: If the interrupt is still asserted, we need to
* resend it. We can take a lockless "peek" at the ICS state here.
*
* "Message" interrupts will never have "asserted" set
*/
ics = kvmppc_xics_find_ics(xics, irq, &src);
if (!ics)
goto bail;
state = &ics->irq_state[src];
/* Still asserted, resend it, we make it look like a reject */
if (state->asserted) {
icp->rm_action |= XICS_RM_REJECT;
icp->rm_reject = irq;
}
bail:
return check_too_hard(xics, icp);
}

View File

@ -79,10 +79,6 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
* *
*****************************************************************************/
#define XICS_XIRR 4
#define XICS_QIRR 0xc
#define XICS_IPI 2 /* interrupt source # for IPIs */
/*
* We come in here when wakened from nap mode on a secondary hw thread.
* Relocation is off and most register values are lost.
@ -101,50 +97,51 @@ kvm_start_guest:
li r0,1
stb r0,PACA_NAPSTATELOST(r13)
/* get vcpu pointer, NULL if we have no vcpu to run */
ld r4,HSTATE_KVM_VCPU(r13)
cmpdi cr1,r4,0
/* were we napping due to cede? */
lbz r0,HSTATE_NAPPING(r13)
cmpwi r0,0
bne kvm_end_cede
/*
* We weren't napping due to cede, so this must be a secondary
* thread being woken up to run a guest, or being woken up due
* to a stray IPI. (Or due to some machine check or hypervisor
* maintenance interrupt while the core is in KVM.)
*/
/* Check the wake reason in SRR1 to see why we got here */
mfspr r3,SPRN_SRR1
rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
cmpwi r3,4 /* was it an external interrupt? */
bne 27f
/*
* External interrupt - for now assume it is an IPI, since we
* should never get any other interrupts sent to offline threads.
* Only do this for secondary threads.
*/
beq cr1,25f
lwz r3,VCPU_PTID(r4)
cmpwi r3,0
beq 27f
25: ld r5,HSTATE_XICS_PHYS(r13)
li r0,0xff
li r6,XICS_QIRR
li r7,XICS_XIRR
bne 27f /* if not */
ld r5,HSTATE_XICS_PHYS(r13)
li r7,XICS_XIRR /* if it was an external interrupt, */
lwzcix r8,r5,r7 /* get and ack the interrupt */
sync
clrldi. r9,r8,40 /* get interrupt source ID. */
beq 27f /* none there? */
cmpwi r9,XICS_IPI
bne 26f
beq 28f /* none there? */
cmpwi r9,XICS_IPI /* was it an IPI? */
bne 29f
li r0,0xff
li r6,XICS_MFRR
stbcix r0,r5,r6 /* clear IPI */
26: stwcix r8,r5,r7 /* EOI the interrupt */
stwcix r8,r5,r7 /* EOI the interrupt */
sync /* order loading of vcpu after that */
27: /* XXX should handle hypervisor maintenance interrupts etc. here */
/* reload vcpu pointer after clearing the IPI */
/* get vcpu pointer, NULL if we have no vcpu to run */
ld r4,HSTATE_KVM_VCPU(r13)
cmpdi r4,0
/* if we have no vcpu to run, go back to sleep */
beq kvm_no_guest
b kvmppc_hv_entry
/* were we napping due to cede? */
lbz r0,HSTATE_NAPPING(r13)
cmpwi r0,0
bne kvm_end_cede
27: /* XXX should handle hypervisor maintenance interrupts etc. here */
b kvm_no_guest
28: /* SRR1 said external but ICP said nope?? */
b kvm_no_guest
29: /* External non-IPI interrupt to offline secondary thread? help?? */
stw r8,HSTATE_SAVED_XIRR(r13)
b kvm_no_guest
.global kvmppc_hv_entry
kvmppc_hv_entry:
@ -260,6 +257,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
lwz r5, LPPACA_YIELDCOUNT(r3)
addi r5, r5, 1
stw r5, LPPACA_YIELDCOUNT(r3)
li r6, 1
stb r6, VCPU_VPA_DIRTY(r4)
25:
/* Load up DAR and DSISR */
ld r5, VCPU_DAR(r4)
@ -485,20 +484,20 @@ toc_tlbie_lock:
mtctr r6
mtxer r7
ld r10, VCPU_PC(r4)
ld r11, VCPU_MSR(r4)
kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
ld r6, VCPU_SRR0(r4)
ld r7, VCPU_SRR1(r4)
ld r10, VCPU_PC(r4)
ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */
/* r11 = vcpu->arch.msr & ~MSR_HV */
rldicl r11, r11, 63 - MSR_HV_LG, 1
rotldi r11, r11, 1 + MSR_HV_LG
ori r11, r11, MSR_ME
/* Check if we can deliver an external or decrementer interrupt now */
ld r0,VCPU_PENDING_EXC(r4)
li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL)
oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
lis r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
and r0,r0,r8
cmpdi cr1,r0,0
andi. r0,r11,MSR_EE
@ -526,10 +525,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
/* Move SRR0 and SRR1 into the respective regs */
5: mtspr SPRN_SRR0, r6
mtspr SPRN_SRR1, r7
li r0,0
stb r0,VCPU_CEDED(r4) /* cancel cede */
fast_guest_return:
li r0,0
stb r0,VCPU_CEDED(r4) /* cancel cede */
mtspr SPRN_HSRR0,r10
mtspr SPRN_HSRR1,r11
@ -676,17 +675,99 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
beq hcall_try_real_mode
/* Check for mediated interrupts (could be done earlier really ...) */
/* Only handle external interrupts here on arch 206 and later */
BEGIN_FTR_SECTION
cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL
bne+ 1f
andi. r0,r11,MSR_EE
beq 1f
mfspr r5,SPRN_LPCR
andi. r0,r5,LPCR_MER
bne bounce_ext_interrupt
1:
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
b ext_interrupt_to_host
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
/* External interrupt ? */
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
bne+ ext_interrupt_to_host
/* External interrupt, first check for host_ipi. If this is
* set, we know the host wants us out so let's do it now
*/
do_ext_interrupt:
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
bne ext_interrupt_to_host
/* Now read the interrupt from the ICP */
ld r5, HSTATE_XICS_PHYS(r13)
li r7, XICS_XIRR
cmpdi r5, 0
beq- ext_interrupt_to_host
lwzcix r3, r5, r7
rlwinm. r0, r3, 0, 0xffffff
sync
beq 3f /* if nothing pending in the ICP */
/* We found something in the ICP...
*
* If it's not an IPI, stash it in the PACA and return to
* the host, we don't (yet) handle directing real external
* interrupts directly to the guest
*/
cmpwi r0, XICS_IPI
bne ext_stash_for_host
/* It's an IPI, clear the MFRR and EOI it */
li r0, 0xff
li r6, XICS_MFRR
stbcix r0, r5, r6 /* clear the IPI */
stwcix r3, r5, r7 /* EOI it */
sync
/* We need to re-check host IPI now in case it got set in the
* meantime. If it's clear, we bounce the interrupt to the
* guest
*/
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
bne- 1f
/* Allright, looks like an IPI for the guest, we need to set MER */
3:
/* Check if any CPU is heading out to the host, if so head out too */
ld r5, HSTATE_KVM_VCORE(r13)
lwz r0, VCORE_ENTRY_EXIT(r5)
cmpwi r0, 0x100
bge ext_interrupt_to_host
/* See if there is a pending interrupt for the guest */
mfspr r8, SPRN_LPCR
ld r0, VCPU_PENDING_EXC(r9)
/* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
rldicl. r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
beq 2f
/* And if the guest EE is set, we can deliver immediately, else
* we return to the guest with MER set
*/
andi. r0, r11, MSR_EE
beq 2f
mtspr SPRN_SRR0, r10
mtspr SPRN_SRR1, r11
li r10, BOOK3S_INTERRUPT_EXTERNAL
li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
rotldi r11, r11, 63
2: mr r4, r9
mtspr SPRN_LPCR, r8
b fast_guest_return
/* We raced with the host, we need to resend that IPI, bummer */
1: li r0, IPI_PRIORITY
stbcix r0, r5, r6 /* set the IPI */
sync
b ext_interrupt_to_host
ext_stash_for_host:
/* It's not an IPI and it's for the host, stash it in the PACA
* before exit, it will be picked up by the host ICP driver
*/
stw r3, HSTATE_SAVED_XIRR(r13)
ext_interrupt_to_host:
guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
/* Save DEC */
@ -829,7 +910,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
beq 44f
ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */
li r0,IPI_PRIORITY
li r7,XICS_QIRR
li r7,XICS_MFRR
stbcix r0,r7,r8 /* trigger the IPI */
44: srdi. r3,r3,1
addi r6,r6,PACA_SIZE
@ -1018,6 +1099,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
lwz r3, LPPACA_YIELDCOUNT(r8)
addi r3, r3, 1
stw r3, LPPACA_YIELDCOUNT(r8)
li r3, 1
stb r3, VCPU_VPA_DIRTY(r9)
25:
/* Save PMU registers if requested */
/* r8 and cr0.eq are live here */
@ -1350,11 +1433,19 @@ hcall_real_table:
.long 0 /* 0x58 */
.long 0 /* 0x5c */
.long 0 /* 0x60 */
.long 0 /* 0x64 */
.long 0 /* 0x68 */
.long 0 /* 0x6c */
.long 0 /* 0x70 */
.long 0 /* 0x74 */
#ifdef CONFIG_KVM_XICS
.long .kvmppc_rm_h_eoi - hcall_real_table
.long .kvmppc_rm_h_cppr - hcall_real_table
.long .kvmppc_rm_h_ipi - hcall_real_table
.long 0 /* 0x70 - H_IPOLL */
.long .kvmppc_rm_h_xirr - hcall_real_table
#else
.long 0 /* 0x64 - H_EOI */
.long 0 /* 0x68 - H_CPPR */
.long 0 /* 0x6c - H_IPI */
.long 0 /* 0x70 - H_IPOLL */
.long 0 /* 0x74 - H_XIRR */
#endif
.long 0 /* 0x78 */
.long 0 /* 0x7c */
.long 0 /* 0x80 */
@ -1405,15 +1496,6 @@ ignore_hdec:
mr r4,r9
b fast_guest_return
bounce_ext_interrupt:
mr r4,r9
mtspr SPRN_SRR0,r10
mtspr SPRN_SRR1,r11
li r10,BOOK3S_INTERRUPT_EXTERNAL
li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
rotldi r11,r11,63
b fast_guest_return
_GLOBAL(kvmppc_h_set_dabr)
std r4,VCPU_DABR(r3)
/* Work around P7 bug where DABR can get corrupted on mtspr */
@ -1519,6 +1601,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
b .
kvm_end_cede:
/* get vcpu pointer */
ld r4, HSTATE_KVM_VCPU(r13)
/* Woken by external or decrementer interrupt */
ld r1, HSTATE_HOST_R1(r13)
@ -1558,6 +1643,16 @@ kvm_end_cede:
li r0,0
stb r0,HSTATE_NAPPING(r13)
/* Check the wake reason in SRR1 to see why we got here */
mfspr r3, SPRN_SRR1
rlwinm r3, r3, 44-31, 0x7 /* extract wake reason field */
cmpwi r3, 4 /* was it an external interrupt? */
li r12, BOOK3S_INTERRUPT_EXTERNAL
mr r9, r4
ld r10, VCPU_PC(r9)
ld r11, VCPU_MSR(r9)
beq do_ext_interrupt /* if so */
/* see if any other thread is already exiting */
lwz r0,VCORE_ENTRY_EXIT(r5)
cmpwi r0,0x100
@ -1577,8 +1672,7 @@ kvm_cede_prodded:
/* we've ceded but we want to give control to the host */
kvm_cede_exit:
li r3,H_TOO_HARD
blr
b hcall_real_fallback
/* Try to handle a machine check in real mode */
machine_check_realmode:
@ -1626,7 +1720,7 @@ secondary_nap:
beq 37f
sync
li r0, 0xff
li r6, XICS_QIRR
li r6, XICS_MFRR
stbcix r0, r5, r6 /* clear the IPI */
stwcix r3, r5, r7 /* EOI it */
37: sync

View File

@ -762,9 +762,7 @@ program_interrupt:
run->exit_reason = KVM_EXIT_MMIO;
r = RESUME_HOST_NV;
break;
case EMULATE_DO_PAPR:
run->exit_reason = KVM_EXIT_PAPR_HCALL;
vcpu->arch.hcall_needed = 1;
case EMULATE_EXIT_USER:
r = RESUME_HOST_NV;
break;
default:
@ -1298,6 +1296,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
{
#ifdef CONFIG_PPC64
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
#endif
if (firmware_has_feature(FW_FEATURE_SET_MODE)) {

View File

@ -227,6 +227,13 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{
long rc = kvmppc_xics_hcall(vcpu, cmd);
kvmppc_set_gpr(vcpu, 3, rc);
return EMULATE_DONE;
}
int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
{
switch (cmd) {
@ -246,6 +253,20 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
vcpu->stat.halt_wakeup++;
return EMULATE_DONE;
case H_XIRR:
case H_CPPR:
case H_EOI:
case H_IPI:
if (kvmppc_xics_enabled(vcpu))
return kvmppc_h_pr_xics_hcall(vcpu, cmd);
break;
case H_RTAS:
if (list_empty(&vcpu->kvm->arch.rtas_tokens))
return RESUME_HOST;
if (kvmppc_rtas_hcall(vcpu))
break;
kvmppc_set_gpr(vcpu, 3, 0);
return EMULATE_DONE;
}
return EMULATE_FAIL;

View File

@ -0,0 +1,274 @@
/*
* Copyright 2012 Michael Ellerman, IBM Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/err.h>
#include <asm/uaccess.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
#include <asm/hvcall.h>
#include <asm/rtas.h>
#ifdef CONFIG_KVM_XICS
static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
{
u32 irq, server, priority;
int rc;
if (args->nargs != 3 || args->nret != 1) {
rc = -3;
goto out;
}
irq = args->args[0];
server = args->args[1];
priority = args->args[2];
rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
if (rc)
rc = -3;
out:
args->rets[0] = rc;
}
static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
{
u32 irq, server, priority;
int rc;
if (args->nargs != 1 || args->nret != 3) {
rc = -3;
goto out;
}
irq = args->args[0];
server = priority = 0;
rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
if (rc) {
rc = -3;
goto out;
}
args->rets[1] = server;
args->rets[2] = priority;
out:
args->rets[0] = rc;
}
static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
{
u32 irq;
int rc;
if (args->nargs != 1 || args->nret != 1) {
rc = -3;
goto out;
}
irq = args->args[0];
rc = kvmppc_xics_int_off(vcpu->kvm, irq);
if (rc)
rc = -3;
out:
args->rets[0] = rc;
}
static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
{
u32 irq;
int rc;
if (args->nargs != 1 || args->nret != 1) {
rc = -3;
goto out;
}
irq = args->args[0];
rc = kvmppc_xics_int_on(vcpu->kvm, irq);
if (rc)
rc = -3;
out:
args->rets[0] = rc;
}
#endif /* CONFIG_KVM_XICS */
struct rtas_handler {
void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
char *name;
};
static struct rtas_handler rtas_handlers[] = {
#ifdef CONFIG_KVM_XICS
{ .name = "ibm,set-xive", .handler = kvm_rtas_set_xive },
{ .name = "ibm,get-xive", .handler = kvm_rtas_get_xive },
{ .name = "ibm,int-off", .handler = kvm_rtas_int_off },
{ .name = "ibm,int-on", .handler = kvm_rtas_int_on },
#endif
};
struct rtas_token_definition {
struct list_head list;
struct rtas_handler *handler;
u64 token;
};
static int rtas_name_matches(char *s1, char *s2)
{
struct kvm_rtas_token_args args;
return !strncmp(s1, s2, sizeof(args.name));
}
static int rtas_token_undefine(struct kvm *kvm, char *name)
{
struct rtas_token_definition *d, *tmp;
lockdep_assert_held(&kvm->lock);
list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
if (rtas_name_matches(d->handler->name, name)) {
list_del(&d->list);
kfree(d);
return 0;
}
}
/* It's not an error to undefine an undefined token */
return 0;
}
static int rtas_token_define(struct kvm *kvm, char *name, u64 token)
{
struct rtas_token_definition *d;
struct rtas_handler *h = NULL;
bool found;
int i;
lockdep_assert_held(&kvm->lock);
list_for_each_entry(d, &kvm->arch.rtas_tokens, list) {
if (d->token == token)
return -EEXIST;
}
found = false;
for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) {
h = &rtas_handlers[i];
if (rtas_name_matches(h->name, name)) {
found = true;
break;
}
}
if (!found)
return -ENOENT;
d = kzalloc(sizeof(*d), GFP_KERNEL);
if (!d)
return -ENOMEM;
d->handler = h;
d->token = token;
list_add_tail(&d->list, &kvm->arch.rtas_tokens);
return 0;
}
int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp)
{
struct kvm_rtas_token_args args;
int rc;
if (copy_from_user(&args, argp, sizeof(args)))
return -EFAULT;
mutex_lock(&kvm->lock);
if (args.token)
rc = rtas_token_define(kvm, args.name, args.token);
else
rc = rtas_token_undefine(kvm, args.name);
mutex_unlock(&kvm->lock);
return rc;
}
int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
{
struct rtas_token_definition *d;
struct rtas_args args;
rtas_arg_t *orig_rets;
gpa_t args_phys;
int rc;
/* r4 contains the guest physical address of the RTAS args */
args_phys = kvmppc_get_gpr(vcpu, 4);
rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
if (rc)
goto fail;
/*
* args->rets is a pointer into args->args. Now that we've
* copied args we need to fix it up to point into our copy,
* not the guest args. We also need to save the original
* value so we can restore it on the way out.
*/
orig_rets = args.rets;
args.rets = &args.args[args.nargs];
mutex_lock(&vcpu->kvm->lock);
rc = -ENOENT;
list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) {
if (d->token == args.token) {
d->handler->handler(vcpu, &args);
rc = 0;
break;
}
}
mutex_unlock(&vcpu->kvm->lock);
if (rc == 0) {
args.rets = orig_rets;
rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args));
if (rc)
goto fail;
}
return rc;
fail:
/*
* We only get here if the guest has called RTAS with a bogus
* args pointer. That means we can't get to the args, and so we
* can't fail the RTAS call. So fail right out to userspace,
* which should kill the guest.
*/
return rc;
}
void kvmppc_rtas_tokens_free(struct kvm *kvm)
{
struct rtas_token_definition *d, *tmp;
lockdep_assert_held(&kvm->lock);
list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
list_del(&d->list);
kfree(d);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,129 @@
/*
* Copyright 2012 Michael Ellerman, IBM Corporation.
* Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*/
#ifndef _KVM_PPC_BOOK3S_XICS_H
#define _KVM_PPC_BOOK3S_XICS_H
/*
* We use a two-level tree to store interrupt source information.
* There are up to 1024 ICS nodes, each of which can represent
* 1024 sources.
*/
#define KVMPPC_XICS_MAX_ICS_ID 1023
#define KVMPPC_XICS_ICS_SHIFT 10
#define KVMPPC_XICS_IRQ_PER_ICS (1 << KVMPPC_XICS_ICS_SHIFT)
#define KVMPPC_XICS_SRC_MASK (KVMPPC_XICS_IRQ_PER_ICS - 1)
/*
* Interrupt source numbers below this are reserved, for example
* 0 is "no interrupt", and 2 is used for IPIs.
*/
#define KVMPPC_XICS_FIRST_IRQ 16
#define KVMPPC_XICS_NR_IRQS ((KVMPPC_XICS_MAX_ICS_ID + 1) * \
KVMPPC_XICS_IRQ_PER_ICS)
/* Priority value to use for disabling an interrupt */
#define MASKED 0xff
/* State for one irq source */
struct ics_irq_state {
u32 number;
u32 server;
u8 priority;
u8 saved_priority;
u8 resend;
u8 masked_pending;
u8 asserted; /* Only for LSI */
u8 exists;
};
/* Atomic ICP state, updated with a single compare & swap */
union kvmppc_icp_state {
unsigned long raw;
struct {
u8 out_ee:1;
u8 need_resend:1;
u8 cppr;
u8 mfrr;
u8 pending_pri;
u32 xisr;
};
};
/* One bit per ICS */
#define ICP_RESEND_MAP_SIZE (KVMPPC_XICS_MAX_ICS_ID / BITS_PER_LONG + 1)
struct kvmppc_icp {
struct kvm_vcpu *vcpu;
unsigned long server_num;
union kvmppc_icp_state state;
unsigned long resend_map[ICP_RESEND_MAP_SIZE];
/* Real mode might find something too hard, here's the action
* it might request from virtual mode
*/
#define XICS_RM_KICK_VCPU 0x1
#define XICS_RM_CHECK_RESEND 0x2
#define XICS_RM_REJECT 0x4
u32 rm_action;
struct kvm_vcpu *rm_kick_target;
u32 rm_reject;
/* Debug stuff for real mode */
union kvmppc_icp_state rm_dbgstate;
struct kvm_vcpu *rm_dbgtgt;
};
struct kvmppc_ics {
struct mutex lock;
u16 icsid;
struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
};
struct kvmppc_xics {
struct kvm *kvm;
struct dentry *dentry;
u32 max_icsid;
bool real_mode;
bool real_mode_dbg;
struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
};
static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
u32 nr)
{
struct kvm_vcpu *vcpu = NULL;
int i;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num)
return vcpu->arch.icp;
}
return NULL;
}
static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
u32 irq, u16 *source)
{
u32 icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
u16 src = irq & KVMPPC_XICS_SRC_MASK;
struct kvmppc_ics *ics;
if (source)
*source = src;
if (icsid > KVMPPC_XICS_MAX_ICS_ID)
return NULL;
ics = xics->ics[icsid];
if (!ics)
return NULL;
return ics;
}
#endif /* _KVM_PPC_BOOK3S_XICS_H */

View File

@ -346,7 +346,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
keep_irq = true;
}
if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled)
if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_flags)
update_epr = true;
switch (priority) {
@ -427,8 +427,14 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
set_guest_esr(vcpu, vcpu->arch.queued_esr);
if (update_dear == true)
set_guest_dear(vcpu, vcpu->arch.queued_dear);
if (update_epr == true)
kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
if (update_epr == true) {
if (vcpu->arch.epr_flags & KVMPPC_EPR_USER)
kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) {
BUG_ON(vcpu->arch.irq_type != KVMPPC_IRQ_MPIC);
kvmppc_mpic_set_epr(vcpu);
}
}
new_msr &= msr_mask;
#if defined(CONFIG_64BIT)
@ -745,6 +751,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
kvmppc_core_queue_program(vcpu, ESR_PIL);
return RESUME_HOST;
case EMULATE_EXIT_USER:
return RESUME_HOST;
default:
BUG();
}
@ -1412,120 +1421,134 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
{
int r = -EINVAL;
int r = 0;
union kvmppc_one_reg val;
int size;
long int i;
size = one_reg_size(reg->id);
if (size > sizeof(val))
return -EINVAL;
switch (reg->id) {
case KVM_REG_PPC_IAC1:
case KVM_REG_PPC_IAC2:
case KVM_REG_PPC_IAC3:
case KVM_REG_PPC_IAC4: {
int iac = reg->id - KVM_REG_PPC_IAC1;
r = copy_to_user((u64 __user *)(long)reg->addr,
&vcpu->arch.dbg_reg.iac[iac], sizeof(u64));
case KVM_REG_PPC_IAC4:
i = reg->id - KVM_REG_PPC_IAC1;
val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]);
break;
}
case KVM_REG_PPC_DAC1:
case KVM_REG_PPC_DAC2: {
int dac = reg->id - KVM_REG_PPC_DAC1;
r = copy_to_user((u64 __user *)(long)reg->addr,
&vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
case KVM_REG_PPC_DAC2:
i = reg->id - KVM_REG_PPC_DAC1;
val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]);
break;
}
case KVM_REG_PPC_EPR: {
u32 epr = get_guest_epr(vcpu);
r = put_user(epr, (u32 __user *)(long)reg->addr);
val = get_reg_val(reg->id, epr);
break;
}
#if defined(CONFIG_64BIT)
case KVM_REG_PPC_EPCR:
r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr);
val = get_reg_val(reg->id, vcpu->arch.epcr);
break;
#endif
case KVM_REG_PPC_TCR:
r = put_user(vcpu->arch.tcr, (u32 __user *)(long)reg->addr);
val = get_reg_val(reg->id, vcpu->arch.tcr);
break;
case KVM_REG_PPC_TSR:
r = put_user(vcpu->arch.tsr, (u32 __user *)(long)reg->addr);
val = get_reg_val(reg->id, vcpu->arch.tsr);
break;
case KVM_REG_PPC_DEBUG_INST: {
u32 opcode = KVMPPC_INST_EHPRIV;
r = copy_to_user((u32 __user *)(long)reg->addr,
&opcode, sizeof(u32));
case KVM_REG_PPC_DEBUG_INST:
val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV);
break;
}
default:
r = kvmppc_get_one_reg(vcpu, reg->id, &val);
break;
}
if (r)
return r;
if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
r = -EFAULT;
return r;
}
int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
{
int r = -EINVAL;
int r = 0;
union kvmppc_one_reg val;
int size;
long int i;
size = one_reg_size(reg->id);
if (size > sizeof(val))
return -EINVAL;
if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
return -EFAULT;
switch (reg->id) {
case KVM_REG_PPC_IAC1:
case KVM_REG_PPC_IAC2:
case KVM_REG_PPC_IAC3:
case KVM_REG_PPC_IAC4: {
int iac = reg->id - KVM_REG_PPC_IAC1;
r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac],
(u64 __user *)(long)reg->addr, sizeof(u64));
case KVM_REG_PPC_IAC4:
i = reg->id - KVM_REG_PPC_IAC1;
vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val);
break;
}
case KVM_REG_PPC_DAC1:
case KVM_REG_PPC_DAC2: {
int dac = reg->id - KVM_REG_PPC_DAC1;
r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac],
(u64 __user *)(long)reg->addr, sizeof(u64));
case KVM_REG_PPC_DAC2:
i = reg->id - KVM_REG_PPC_DAC1;
vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val);
break;
}
case KVM_REG_PPC_EPR: {
u32 new_epr;
r = get_user(new_epr, (u32 __user *)(long)reg->addr);
if (!r)
kvmppc_set_epr(vcpu, new_epr);
u32 new_epr = set_reg_val(reg->id, val);
kvmppc_set_epr(vcpu, new_epr);
break;
}
#if defined(CONFIG_64BIT)
case KVM_REG_PPC_EPCR: {
u32 new_epcr;
r = get_user(new_epcr, (u32 __user *)(long)reg->addr);
if (r == 0)
kvmppc_set_epcr(vcpu, new_epcr);
u32 new_epcr = set_reg_val(reg->id, val);
kvmppc_set_epcr(vcpu, new_epcr);
break;
}
#endif
case KVM_REG_PPC_OR_TSR: {
u32 tsr_bits;
r = get_user(tsr_bits, (u32 __user *)(long)reg->addr);
u32 tsr_bits = set_reg_val(reg->id, val);
kvmppc_set_tsr_bits(vcpu, tsr_bits);
break;
}
case KVM_REG_PPC_CLEAR_TSR: {
u32 tsr_bits;
r = get_user(tsr_bits, (u32 __user *)(long)reg->addr);
u32 tsr_bits = set_reg_val(reg->id, val);
kvmppc_clr_tsr_bits(vcpu, tsr_bits);
break;
}
case KVM_REG_PPC_TSR: {
u32 tsr;
r = get_user(tsr, (u32 __user *)(long)reg->addr);
u32 tsr = set_reg_val(reg->id, val);
kvmppc_set_tsr(vcpu, tsr);
break;
}
case KVM_REG_PPC_TCR: {
u32 tcr;
r = get_user(tcr, (u32 __user *)(long)reg->addr);
u32 tcr = set_reg_val(reg->id, val);
kvmppc_set_tcr(vcpu, tcr);
break;
}
default:
r = kvmppc_set_one_reg(vcpu, reg->id, &val);
break;
}
return r;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
return -EINVAL;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
return -ENOTSUPP;

View File

@ -425,6 +425,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return kvmppc_set_sregs_ivor(vcpu, sregs);
}
int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
{
int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
return r;
}
int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
{
int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
return r;
}
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvmppc_vcpu_e500 *vcpu_e500;

View File

@ -23,6 +23,10 @@
#include <asm/mmu-book3e.h>
#include <asm/tlb.h>
enum vcpu_ftr {
VCPU_FTR_MMU_V2
};
#define E500_PID_NUM 3
#define E500_TLB_NUM 2
@ -131,6 +135,10 @@ void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val);
int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val);
#ifdef CONFIG_KVM_E500V2
unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
@ -295,4 +303,18 @@ static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu)
#define get_tlb_sts(gtlbe) (MAS1_TS)
#endif /* !BOOKE_HV */
static inline bool has_feature(const struct kvm_vcpu *vcpu,
enum vcpu_ftr ftr)
{
bool has_ftr;
switch (ftr) {
case VCPU_FTR_MMU_V2:
has_ftr = ((vcpu->arch.mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2);
break;
default:
return false;
}
return has_ftr;
}
#endif /* KVM_E500_H */

View File

@ -284,6 +284,16 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_TLB1CFG:
*spr_val = vcpu->arch.tlbcfg[1];
break;
case SPRN_TLB0PS:
if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
return EMULATE_FAIL;
*spr_val = vcpu->arch.tlbps[0];
break;
case SPRN_TLB1PS:
if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
return EMULATE_FAIL;
*spr_val = vcpu->arch.tlbps[1];
break;
case SPRN_L1CSR0:
*spr_val = vcpu_e500->l1csr0;
break;
@ -307,6 +317,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_MMUCFG:
*spr_val = vcpu->arch.mmucfg;
break;
case SPRN_EPTCFG:
if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
return EMULATE_FAIL;
/*
* Legacy Linux guests access EPTCFG register even if the E.PT
* category is disabled in the VM. Give them a chance to live.
*/
*spr_val = vcpu->arch.eptcfg;
break;
/* extra exceptions */
case SPRN_IVOR32:

View File

@ -596,6 +596,140 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return 0;
}
int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
{
int r = 0;
long int i;
switch (id) {
case KVM_REG_PPC_MAS0:
*val = get_reg_val(id, vcpu->arch.shared->mas0);
break;
case KVM_REG_PPC_MAS1:
*val = get_reg_val(id, vcpu->arch.shared->mas1);
break;
case KVM_REG_PPC_MAS2:
*val = get_reg_val(id, vcpu->arch.shared->mas2);
break;
case KVM_REG_PPC_MAS7_3:
*val = get_reg_val(id, vcpu->arch.shared->mas7_3);
break;
case KVM_REG_PPC_MAS4:
*val = get_reg_val(id, vcpu->arch.shared->mas4);
break;
case KVM_REG_PPC_MAS6:
*val = get_reg_val(id, vcpu->arch.shared->mas6);
break;
case KVM_REG_PPC_MMUCFG:
*val = get_reg_val(id, vcpu->arch.mmucfg);
break;
case KVM_REG_PPC_EPTCFG:
*val = get_reg_val(id, vcpu->arch.eptcfg);
break;
case KVM_REG_PPC_TLB0CFG:
case KVM_REG_PPC_TLB1CFG:
case KVM_REG_PPC_TLB2CFG:
case KVM_REG_PPC_TLB3CFG:
i = id - KVM_REG_PPC_TLB0CFG;
*val = get_reg_val(id, vcpu->arch.tlbcfg[i]);
break;
case KVM_REG_PPC_TLB0PS:
case KVM_REG_PPC_TLB1PS:
case KVM_REG_PPC_TLB2PS:
case KVM_REG_PPC_TLB3PS:
i = id - KVM_REG_PPC_TLB0PS;
*val = get_reg_val(id, vcpu->arch.tlbps[i]);
break;
default:
r = -EINVAL;
break;
}
return r;
}
int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
{
int r = 0;
long int i;
switch (id) {
case KVM_REG_PPC_MAS0:
vcpu->arch.shared->mas0 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_MAS1:
vcpu->arch.shared->mas1 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_MAS2:
vcpu->arch.shared->mas2 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_MAS7_3:
vcpu->arch.shared->mas7_3 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_MAS4:
vcpu->arch.shared->mas4 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_MAS6:
vcpu->arch.shared->mas6 = set_reg_val(id, *val);
break;
/* Only allow MMU registers to be set to the config supported by KVM */
case KVM_REG_PPC_MMUCFG: {
u32 reg = set_reg_val(id, *val);
if (reg != vcpu->arch.mmucfg)
r = -EINVAL;
break;
}
case KVM_REG_PPC_EPTCFG: {
u32 reg = set_reg_val(id, *val);
if (reg != vcpu->arch.eptcfg)
r = -EINVAL;
break;
}
case KVM_REG_PPC_TLB0CFG:
case KVM_REG_PPC_TLB1CFG:
case KVM_REG_PPC_TLB2CFG:
case KVM_REG_PPC_TLB3CFG: {
/* MMU geometry (N_ENTRY/ASSOC) can be set only using SW_TLB */
u32 reg = set_reg_val(id, *val);
i = id - KVM_REG_PPC_TLB0CFG;
if (reg != vcpu->arch.tlbcfg[i])
r = -EINVAL;
break;
}
case KVM_REG_PPC_TLB0PS:
case KVM_REG_PPC_TLB1PS:
case KVM_REG_PPC_TLB2PS:
case KVM_REG_PPC_TLB3PS: {
u32 reg = set_reg_val(id, *val);
i = id - KVM_REG_PPC_TLB0PS;
if (reg != vcpu->arch.tlbps[i])
r = -EINVAL;
break;
}
default:
r = -EINVAL;
break;
}
return r;
}
static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu,
struct kvm_book3e_206_tlb_params *params)
{
vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
if (params->tlb_sizes[0] <= 2048)
vcpu->arch.tlbcfg[0] |= params->tlb_sizes[0];
vcpu->arch.tlbcfg[0] |= params->tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
vcpu->arch.tlbcfg[1] |= params->tlb_sizes[1];
vcpu->arch.tlbcfg[1] |= params->tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
return 0;
}
int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
struct kvm_config_tlb *cfg)
{
@ -692,16 +826,8 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
vcpu_e500->gtlb_offset[0] = 0;
vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
if (params.tlb_sizes[0] <= 2048)
vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0];
vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1];
vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
/* Update vcpu's MMU geometry based on SW_TLB input */
vcpu_mmu_geometry_update(vcpu, &params);
vcpu_e500->shared_tlb_pages = pages;
vcpu_e500->num_shared_tlb_pages = num_pages;
@ -737,6 +863,39 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
return 0;
}
/* Vcpu's MMU default configuration */
static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
struct kvmppc_e500_tlb_params *params)
{
/* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host values*/
vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
/* Initialize TLBnCFG fields with host values and SW_TLB geometry*/
vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
vcpu->arch.tlbcfg[0] |= params[0].entries;
vcpu->arch.tlbcfg[0] |= params[0].ways << TLBnCFG_ASSOC_SHIFT;
vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
vcpu->arch.tlbcfg[1] |= params[1].entries;
vcpu->arch.tlbcfg[1] |= params[1].ways << TLBnCFG_ASSOC_SHIFT;
if (has_feature(vcpu, VCPU_FTR_MMU_V2)) {
vcpu->arch.tlbps[0] = mfspr(SPRN_TLB0PS);
vcpu->arch.tlbps[1] = mfspr(SPRN_TLB1PS);
vcpu->arch.mmucfg &= ~MMUCFG_LRAT;
/* Guest mmu emulation currently doesn't handle E.PT */
vcpu->arch.eptcfg = 0;
vcpu->arch.tlbcfg[0] &= ~TLBnCFG_PT;
vcpu->arch.tlbcfg[1] &= ~TLBnCFG_IND;
}
return 0;
}
int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
{
struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
@ -781,18 +940,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
if (!vcpu_e500->g2h_tlb1_map)
goto err;
/* Init TLB configuration register */
vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries;
vcpu->arch.tlbcfg[0] |=
vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT;
vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries;
vcpu->arch.tlbcfg[1] |=
vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT;
vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params);
kvmppc_recalc_tlb1map_range(vcpu_e500);
return 0;

View File

@ -172,6 +172,8 @@ int kvmppc_core_check_processor_compat(void)
r = 0;
else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
r = 0;
else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0)
r = 0;
else
r = -ENOTSUPP;
@ -255,6 +257,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return kvmppc_set_sregs_ivor(vcpu, sregs);
}
int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
{
int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
return r;
}
int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
union kvmppc_one_reg *val)
{
int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
return r;
}
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvmppc_vcpu_e500 *vcpu_e500;

17
arch/powerpc/kvm/irq.h Normal file
View File

@ -0,0 +1,17 @@
#ifndef __IRQ_H
#define __IRQ_H
#include <linux/kvm_host.h>
static inline int irqchip_in_kernel(struct kvm *kvm)
{
int ret = 0;
#ifdef CONFIG_KVM_MPIC
ret = ret || (kvm->arch.mpic != NULL);
#endif
smp_rmb();
return ret;
}
#endif

1843
arch/powerpc/kvm/mpic.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -25,6 +25,7 @@
#include <linux/hrtimer.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <asm/cputable.h>
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
@ -32,6 +33,7 @@
#include <asm/cputhreads.h>
#include <asm/irqflags.h>
#include "timing.h"
#include "irq.h"
#include "../mm/mmu_decl.h"
#define CREATE_TRACE_POINTS
@ -317,6 +319,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_ONE_REG:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
r = 1;
break;
#ifndef CONFIG_KVM_BOOK3S_64_HV
@ -325,6 +328,9 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PPC_GET_PVINFO:
#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
case KVM_CAP_SW_TLB:
#endif
#ifdef CONFIG_KVM_MPIC
case KVM_CAP_IRQ_MPIC:
#endif
r = 1;
break;
@ -335,6 +341,7 @@ int kvm_dev_ioctl_check_extension(long ext)
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
case KVM_CAP_PPC_ALLOC_HTAB:
case KVM_CAP_PPC_RTAS:
r = 1;
break;
#endif /* CONFIG_PPC_BOOK3S_64 */
@ -459,6 +466,16 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
tasklet_kill(&vcpu->arch.tasklet);
kvmppc_remove_vcpu_debugfs(vcpu);
switch (vcpu->arch.irq_type) {
case KVMPPC_IRQ_MPIC:
kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
break;
case KVMPPC_IRQ_XICS:
kvmppc_xics_free_icp(vcpu);
break;
}
kvmppc_core_vcpu_free(vcpu);
}
@ -531,12 +548,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
#endif
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
return -EINVAL;
}
static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
@ -768,7 +779,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
break;
case KVM_CAP_PPC_EPR:
r = 0;
vcpu->arch.epr_enabled = cap->args[0];
if (cap->args[0])
vcpu->arch.epr_flags |= KVMPPC_EPR_USER;
else
vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER;
break;
#ifdef CONFIG_BOOKE
case KVM_CAP_PPC_BOOKE_WATCHDOG:
@ -788,6 +802,25 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg);
break;
}
#endif
#ifdef CONFIG_KVM_MPIC
case KVM_CAP_IRQ_MPIC: {
struct file *filp;
struct kvm_device *dev;
r = -EBADF;
filp = fget(cap->args[0]);
if (!filp)
break;
r = -EPERM;
dev = kvm_device_from_filp(filp);
if (dev)
r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]);
fput(filp);
break;
}
#endif
default:
r = -EINVAL;
@ -911,9 +944,22 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
return 0;
}
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
bool line_status)
{
if (!irqchip_in_kernel(kvm))
return -ENXIO;
irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
irq_event->irq, irq_event->level,
line_status);
return 0;
}
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
struct kvm *kvm __maybe_unused = filp->private_data;
void __user *argp = (void __user *)arg;
long r;
@ -932,7 +978,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CREATE_SPAPR_TCE: {
struct kvm_create_spapr_tce create_tce;
struct kvm *kvm = filp->private_data;
r = -EFAULT;
if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
@ -944,8 +989,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
#ifdef CONFIG_KVM_BOOK3S_64_HV
case KVM_ALLOCATE_RMA: {
struct kvm *kvm = filp->private_data;
struct kvm_allocate_rma rma;
struct kvm *kvm = filp->private_data;
r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
@ -954,7 +999,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
case KVM_PPC_ALLOCATE_HTAB: {
struct kvm *kvm = filp->private_data;
u32 htab_order;
r = -EFAULT;
@ -971,7 +1015,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
case KVM_PPC_GET_HTAB_FD: {
struct kvm *kvm = filp->private_data;
struct kvm_get_htab_fd ghf;
r = -EFAULT;
@ -984,7 +1027,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_PPC_GET_SMMU_INFO: {
struct kvm *kvm = filp->private_data;
struct kvm_ppc_smmu_info info;
memset(&info, 0, sizeof(info));
@ -993,6 +1035,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = -EFAULT;
break;
}
case KVM_PPC_RTAS_DEFINE_TOKEN: {
struct kvm *kvm = filp->private_data;
r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
break;
}
#endif /* CONFIG_PPC_BOOK3S_64 */
default:
r = -ENOTTY;

View File

@ -51,6 +51,12 @@ static struct icp_ipl __iomem *icp_native_regs[NR_CPUS];
static inline unsigned int icp_native_get_xirr(void)
{
int cpu = smp_processor_id();
unsigned int xirr;
/* Handled an interrupt latched by KVM */
xirr = kvmppc_get_xics_latch();
if (xirr)
return xirr;
return in_be32(&icp_native_regs[cpu]->xirr.word);
}
@ -138,6 +144,7 @@ static unsigned int icp_native_get_irq(void)
static void icp_native_cause_ipi(int cpu, unsigned long data)
{
kvmppc_set_host_ipi(cpu, 1);
icp_native_set_qirr(cpu, IPI_PRIORITY);
}
@ -151,6 +158,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
{
int cpu = smp_processor_id();
kvmppc_set_host_ipi(cpu, 0);
icp_native_set_qirr(cpu, 0xff);
return smp_ipi_demux();

View File

@ -43,6 +43,8 @@
#define KVM_PIO_PAGE_OFFSET 1
#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
| X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \

View File

@ -29,6 +29,7 @@ config KVM
select MMU_NOTIFIER
select ANON_INODES
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
select KVM_ASYNC_PF

View File

@ -7,7 +7,7 @@ CFLAGS_vmx.o := -I.
kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o irq_comm.o eventfd.o \
assigned-dev.o)
assigned-dev.o irqchip.o)
kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o)
kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o)

View File

@ -2522,7 +2522,6 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PCI_2_3:
case KVM_CAP_KVMCLOCK_CTRL:
case KVM_CAP_READONLY_MEM:
case KVM_CAP_IRQFD_RESAMPLE:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:

View File

@ -303,10 +303,10 @@ struct kvm_kernel_irq_routing_entry {
struct hlist_node link;
};
#ifdef __KVM_HAVE_IOAPIC
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
struct kvm_irq_routing_table {
int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS];
int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
struct kvm_kernel_irq_routing_entry *rt_entries;
u32 nr_rt_entries;
/*
@ -392,6 +392,7 @@ struct kvm {
long mmu_notifier_count;
#endif
long tlbs_dirty;
struct list_head devices;
};
#define kvm_err(fmt, ...) \
@ -431,7 +432,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
int __must_check vcpu_load(struct kvm_vcpu *vcpu);
void vcpu_put(struct kvm_vcpu *vcpu);
#ifdef __KVM_HAVE_IOAPIC
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
int kvm_irqfd_init(void);
void kvm_irqfd_exit(void);
#else
@ -718,11 +719,6 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
bool mask);
#ifdef __KVM_HAVE_IOAPIC
void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
union kvm_ioapic_redirect_entry *entry,
unsigned long *deliver_bitmask);
#endif
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status);
int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
@ -956,7 +952,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
}
#endif
#ifdef KVM_CAP_IRQ_ROUTING
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
#define KVM_MAX_IRQ_ROUTES 1024
@ -965,6 +961,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *entries,
unsigned nr,
unsigned flags);
int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue);
void kvm_free_irq_routing(struct kvm *kvm);
int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
@ -1065,6 +1064,43 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
extern bool kvm_rebooting;
struct kvm_device_ops;
struct kvm_device {
struct kvm_device_ops *ops;
struct kvm *kvm;
void *private;
struct list_head vm_node;
};
/* create, destroy, and name are mandatory */
struct kvm_device_ops {
const char *name;
int (*create)(struct kvm_device *dev, u32 type);
/*
* Destroy is responsible for freeing dev.
*
* Destroy may be called before or after destructors are called
* on emulated I/O regions, depending on whether a reference is
* held by a vcpu or other kvm component that gets destroyed
* after the emulated I/O.
*/
void (*destroy)(struct kvm_device *dev);
int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
long (*ioctl)(struct kvm_device *dev, unsigned int ioctl,
unsigned long arg);
};
void kvm_device_get(struct kvm_device *dev);
void kvm_device_put(struct kvm_device *dev);
struct kvm_device *kvm_device_from_filp(struct file *filp);
extern struct kvm_device_ops kvm_mpic_ops;
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)

View File

@ -37,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit,
__entry->errno < 0 ? -__entry->errno : __entry->reason)
);
#if defined(__KVM_HAVE_IRQ_LINE)
#if defined(CONFIG_HAVE_KVM_IRQCHIP)
TRACE_EVENT(kvm_set_irq,
TP_PROTO(unsigned int gsi, int level, int irq_source_id),
TP_ARGS(gsi, level, irq_source_id),
@ -122,6 +122,10 @@ TRACE_EVENT(kvm_msi_set_irq,
{KVM_IRQCHIP_PIC_SLAVE, "PIC slave"}, \
{KVM_IRQCHIP_IOAPIC, "IOAPIC"}
#endif /* defined(__KVM_HAVE_IOAPIC) */
#if defined(CONFIG_HAVE_KVM_IRQCHIP)
TRACE_EVENT(kvm_ack_irq,
TP_PROTO(unsigned int irqchip, unsigned int pin),
TP_ARGS(irqchip, pin),
@ -136,14 +140,18 @@ TRACE_EVENT(kvm_ack_irq,
__entry->pin = pin;
),
#ifdef kvm_irqchips
TP_printk("irqchip %s pin %u",
__print_symbolic(__entry->irqchip, kvm_irqchips),
__entry->pin)
#else
TP_printk("irqchip %d pin %u", __entry->irqchip, __entry->pin)
#endif
);
#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */
#endif /* defined(__KVM_HAVE_IOAPIC) */
#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
#define KVM_TRACE_MMIO_READ 1

View File

@ -579,9 +579,7 @@ struct kvm_ppc_smmu_info {
#ifdef __KVM_HAVE_PIT
#define KVM_CAP_REINJECT_CONTROL 24
#endif
#ifdef __KVM_HAVE_IOAPIC
#define KVM_CAP_IRQ_ROUTING 25
#endif
#define KVM_CAP_IRQ_INJECT_STATUS 26
#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
#define KVM_CAP_DEVICE_DEASSIGNMENT 27
@ -668,6 +666,9 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_PPC_EPR 86
#define KVM_CAP_ARM_PSCI 87
#define KVM_CAP_ARM_SET_DEVICE_ADDR 88
#define KVM_CAP_DEVICE_CTRL 89
#define KVM_CAP_IRQ_MPIC 90
#define KVM_CAP_PPC_RTAS 91
#ifdef KVM_CAP_IRQ_ROUTING
@ -820,6 +821,27 @@ struct kvm_arm_device_addr {
__u64 addr;
};
/*
* Device control API, available with KVM_CAP_DEVICE_CTRL
*/
#define KVM_CREATE_DEVICE_TEST 1
struct kvm_create_device {
__u32 type; /* in: KVM_DEV_TYPE_xxx */
__u32 fd; /* out: device handle */
__u32 flags; /* in: KVM_CREATE_DEVICE_xxx */
};
struct kvm_device_attr {
__u32 flags; /* no flags currently defined */
__u32 group; /* device-defined */
__u64 attr; /* group-defined */
__u64 addr; /* userspace address of attr data */
};
#define KVM_DEV_TYPE_FSL_MPIC_20 1
#define KVM_DEV_TYPE_FSL_MPIC_42 2
/*
* ioctls for VM fds
*/
@ -907,6 +929,16 @@ struct kvm_s390_ucas_mapping {
#define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd)
/* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */
#define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr)
/* Available with KVM_CAP_PPC_RTAS */
#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args)
/* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
/* ioctls for fds returned by KVM_CREATE_DEVICE */
#define KVM_SET_DEVICE_ATTR _IOW(KVMIO, 0xe1, struct kvm_device_attr)
#define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xe2, struct kvm_device_attr)
#define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xe3, struct kvm_device_attr)
/*
* ioctls for vcpu fds

View File

@ -6,6 +6,9 @@ config HAVE_KVM
config HAVE_KVM_IRQCHIP
bool
config HAVE_KVM_IRQ_ROUTING
bool
config HAVE_KVM_EVENTFD
bool
select EVENTFD

View File

@ -983,36 +983,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
goto out;
break;
}
#ifdef KVM_CAP_IRQ_ROUTING
case KVM_SET_GSI_ROUTING: {
struct kvm_irq_routing routing;
struct kvm_irq_routing __user *urouting;
struct kvm_irq_routing_entry *entries;
r = -EFAULT;
if (copy_from_user(&routing, argp, sizeof(routing)))
goto out;
r = -EINVAL;
if (routing.nr >= KVM_MAX_IRQ_ROUTES)
goto out;
if (routing.flags)
goto out;
r = -ENOMEM;
entries = vmalloc(routing.nr * sizeof(*entries));
if (!entries)
goto out;
r = -EFAULT;
urouting = argp;
if (copy_from_user(entries, urouting->entries,
routing.nr * sizeof(*entries)))
goto out_free_irq_routing;
r = kvm_set_irq_routing(kvm, entries, routing.nr,
routing.flags);
out_free_irq_routing:
vfree(entries);
break;
}
#endif /* KVM_CAP_IRQ_ROUTING */
#ifdef __KVM_HAVE_MSIX
case KVM_ASSIGN_SET_MSIX_NR: {
struct kvm_assigned_msix_nr entry_nr;

View File

@ -35,7 +35,7 @@
#include "iodev.h"
#ifdef __KVM_HAVE_IOAPIC
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
/*
* --------------------------------------------------------------------
* irqfd: Allows an fd to be used to inject an interrupt to the guest
@ -433,7 +433,7 @@ fail:
void
kvm_eventfd_init(struct kvm *kvm)
{
#ifdef __KVM_HAVE_IOAPIC
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
spin_lock_init(&kvm->irqfds.lock);
INIT_LIST_HEAD(&kvm->irqfds.items);
INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
@ -442,7 +442,7 @@ kvm_eventfd_init(struct kvm *kvm)
INIT_LIST_HEAD(&kvm->ioeventfds);
}
#ifdef __KVM_HAVE_IOAPIC
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
/*
* shutdown any irqfd's that match fd+gsi
*/

View File

@ -151,59 +151,6 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
return -EWOULDBLOCK;
}
int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
{
struct kvm_kernel_irq_routing_entry route;
if (!irqchip_in_kernel(kvm) || msi->flags != 0)
return -EINVAL;
route.msi.address_lo = msi->address_lo;
route.msi.address_hi = msi->address_hi;
route.msi.data = msi->data;
return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false);
}
/*
* Return value:
* < 0 Interrupt was ignored (masked or not delivered for other reasons)
* = 0 Interrupt was coalesced (previous irq is still pending)
* > 0 Number of CPUs interrupt was delivered to
*/
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status)
{
struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
int ret = -1, i = 0;
struct kvm_irq_routing_table *irq_rt;
trace_kvm_set_irq(irq, level, irq_source_id);
/* Not possible to detect if the guest uses the PIC or the
* IOAPIC. So set the bit in both. The guest will ignore
* writes to the unused one.
*/
rcu_read_lock();
irq_rt = rcu_dereference(kvm->irq_routing);
if (irq < irq_rt->nr_rt_entries)
hlist_for_each_entry(e, &irq_rt->map[irq], link)
irq_set[i++] = *e;
rcu_read_unlock();
while(i--) {
int r;
r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level,
line_status);
if (r < 0)
continue;
ret = r + ((ret < 0) ? 0 : ret);
}
return ret;
}
/*
* Deliver an IRQ in an atomic context if we can, or return a failure,
* user can retry in a process context.
@ -241,63 +188,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
return ret;
}
bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
struct kvm_irq_ack_notifier *kian;
int gsi;
rcu_read_lock();
gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
if (gsi != -1)
hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
link)
if (kian->gsi == gsi) {
rcu_read_unlock();
return true;
}
rcu_read_unlock();
return false;
}
EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
struct kvm_irq_ack_notifier *kian;
int gsi;
trace_kvm_ack_irq(irqchip, pin);
rcu_read_lock();
gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
if (gsi != -1)
hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
link)
if (kian->gsi == gsi)
kian->irq_acked(kian);
rcu_read_unlock();
}
void kvm_register_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian)
{
mutex_lock(&kvm->irq_lock);
hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
mutex_unlock(&kvm->irq_lock);
kvm_vcpu_request_scan_ioapic(kvm);
}
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian)
{
mutex_lock(&kvm->irq_lock);
hlist_del_init_rcu(&kian->link);
mutex_unlock(&kvm->irq_lock);
synchronize_rcu();
kvm_vcpu_request_scan_ioapic(kvm);
}
int kvm_request_irq_source_id(struct kvm *kvm)
{
unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
@ -381,34 +271,14 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
rcu_read_unlock();
}
void kvm_free_irq_routing(struct kvm *kvm)
{
/* Called only during vm destruction. Nobody can use the pointer
at this stage */
kfree(kvm->irq_routing);
}
static int setup_routing_entry(struct kvm_irq_routing_table *rt,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
int r = -EINVAL;
int delta;
unsigned max_pin;
struct kvm_kernel_irq_routing_entry *ei;
/*
* Do not allow GSI to be mapped to the same irqchip more than once.
* Allow only one to one mapping between GSI and MSI.
*/
hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
if (ei->type == KVM_IRQ_ROUTING_MSI ||
ue->type == KVM_IRQ_ROUTING_MSI ||
ue->u.irqchip.irqchip == ei->irqchip.irqchip)
return r;
e->gsi = ue->gsi;
e->type = ue->type;
switch (ue->type) {
case KVM_IRQ_ROUTING_IRQCHIP:
delta = 0;
@ -445,69 +315,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
goto out;
}
hlist_add_head(&e->link, &rt->map[e->gsi]);
r = 0;
out:
return r;
}
int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *ue,
unsigned nr,
unsigned flags)
{
struct kvm_irq_routing_table *new, *old;
u32 i, j, nr_rt_entries = 0;
int r;
for (i = 0; i < nr; ++i) {
if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
return -EINVAL;
nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
}
nr_rt_entries += 1;
new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head))
+ (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
GFP_KERNEL);
if (!new)
return -ENOMEM;
new->rt_entries = (void *)&new->map[nr_rt_entries];
new->nr_rt_entries = nr_rt_entries;
for (i = 0; i < 3; i++)
for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
new->chip[i][j] = -1;
for (i = 0; i < nr; ++i) {
r = -EINVAL;
if (ue->flags)
goto out;
r = setup_routing_entry(new, &new->rt_entries[i], ue);
if (r)
goto out;
++ue;
}
mutex_lock(&kvm->irq_lock);
old = kvm->irq_routing;
kvm_irq_routing_update(kvm, new);
mutex_unlock(&kvm->irq_lock);
synchronize_rcu();
new = old;
r = 0;
out:
kfree(new);
return r;
}
#define IOAPIC_ROUTING_ENTRY(irq) \
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
.u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) }

237
virt/kvm/irqchip.c Normal file
View File

@ -0,0 +1,237 @@
/*
* irqchip.c: Common API for in kernel interrupt controllers
* Copyright (c) 2007, Intel Corporation.
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
* Copyright (c) 2013, Alexander Graf <agraf@suse.de>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
*
* This file is derived from virt/kvm/irq_comm.c.
*
* Authors:
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
* Alexander Graf <agraf@suse.de>
*/
#include <linux/kvm_host.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <trace/events/kvm.h>
#include "irq.h"
bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
struct kvm_irq_ack_notifier *kian;
int gsi;
rcu_read_lock();
gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
if (gsi != -1)
hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
link)
if (kian->gsi == gsi) {
rcu_read_unlock();
return true;
}
rcu_read_unlock();
return false;
}
EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
struct kvm_irq_ack_notifier *kian;
int gsi;
trace_kvm_ack_irq(irqchip, pin);
rcu_read_lock();
gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
if (gsi != -1)
hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
link)
if (kian->gsi == gsi)
kian->irq_acked(kian);
rcu_read_unlock();
}
void kvm_register_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian)
{
mutex_lock(&kvm->irq_lock);
hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
mutex_unlock(&kvm->irq_lock);
#ifdef __KVM_HAVE_IOAPIC
kvm_vcpu_request_scan_ioapic(kvm);
#endif
}
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian)
{
mutex_lock(&kvm->irq_lock);
hlist_del_init_rcu(&kian->link);
mutex_unlock(&kvm->irq_lock);
synchronize_rcu();
#ifdef __KVM_HAVE_IOAPIC
kvm_vcpu_request_scan_ioapic(kvm);
#endif
}
int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
{
struct kvm_kernel_irq_routing_entry route;
if (!irqchip_in_kernel(kvm) || msi->flags != 0)
return -EINVAL;
route.msi.address_lo = msi->address_lo;
route.msi.address_hi = msi->address_hi;
route.msi.data = msi->data;
return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false);
}
/*
* Return value:
* < 0 Interrupt was ignored (masked or not delivered for other reasons)
* = 0 Interrupt was coalesced (previous irq is still pending)
* > 0 Number of CPUs interrupt was delivered to
*/
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status)
{
struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
int ret = -1, i = 0;
struct kvm_irq_routing_table *irq_rt;
trace_kvm_set_irq(irq, level, irq_source_id);
/* Not possible to detect if the guest uses the PIC or the
* IOAPIC. So set the bit in both. The guest will ignore
* writes to the unused one.
*/
rcu_read_lock();
irq_rt = rcu_dereference(kvm->irq_routing);
if (irq < irq_rt->nr_rt_entries)
hlist_for_each_entry(e, &irq_rt->map[irq], link)
irq_set[i++] = *e;
rcu_read_unlock();
while(i--) {
int r;
r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level,
line_status);
if (r < 0)
continue;
ret = r + ((ret < 0) ? 0 : ret);
}
return ret;
}
void kvm_free_irq_routing(struct kvm *kvm)
{
/* Called only during vm destruction. Nobody can use the pointer
at this stage */
kfree(kvm->irq_routing);
}
static int setup_routing_entry(struct kvm_irq_routing_table *rt,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
int r = -EINVAL;
struct kvm_kernel_irq_routing_entry *ei;
/*
* Do not allow GSI to be mapped to the same irqchip more than once.
* Allow only one to one mapping between GSI and MSI.
*/
hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
if (ei->type == KVM_IRQ_ROUTING_MSI ||
ue->type == KVM_IRQ_ROUTING_MSI ||
ue->u.irqchip.irqchip == ei->irqchip.irqchip)
return r;
e->gsi = ue->gsi;
e->type = ue->type;
r = kvm_set_routing_entry(rt, e, ue);
if (r)
goto out;
hlist_add_head(&e->link, &rt->map[e->gsi]);
r = 0;
out:
return r;
}
int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *ue,
unsigned nr,
unsigned flags)
{
struct kvm_irq_routing_table *new, *old;
u32 i, j, nr_rt_entries = 0;
int r;
for (i = 0; i < nr; ++i) {
if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
return -EINVAL;
nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
}
nr_rt_entries += 1;
new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head))
+ (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
GFP_KERNEL);
if (!new)
return -ENOMEM;
new->rt_entries = (void *)&new->map[nr_rt_entries];
new->nr_rt_entries = nr_rt_entries;
for (i = 0; i < KVM_NR_IRQCHIPS; i++)
for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++)
new->chip[i][j] = -1;
for (i = 0; i < nr; ++i) {
r = -EINVAL;
if (ue->flags)
goto out;
r = setup_routing_entry(new, &new->rt_entries[i], ue);
if (r)
goto out;
++ue;
}
mutex_lock(&kvm->irq_lock);
old = kvm->irq_routing;
kvm_irq_routing_update(kvm, new);
mutex_unlock(&kvm->irq_lock);
synchronize_rcu();
new = old;
r = 0;
out:
kfree(new);
return r;
}

View File

@ -504,6 +504,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
mutex_init(&kvm->irq_lock);
mutex_init(&kvm->slots_lock);
atomic_set(&kvm->users_count, 1);
INIT_LIST_HEAD(&kvm->devices);
r = kvm_init_mmu_notifier(kvm);
if (r)
@ -581,6 +582,19 @@ void kvm_free_physmem(struct kvm *kvm)
kfree(kvm->memslots);
}
static void kvm_destroy_devices(struct kvm *kvm)
{
struct list_head *node, *tmp;
list_for_each_safe(node, tmp, &kvm->devices) {
struct kvm_device *dev =
list_entry(node, struct kvm_device, vm_node);
list_del(node);
dev->ops->destroy(dev);
}
}
static void kvm_destroy_vm(struct kvm *kvm)
{
int i;
@ -600,6 +614,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_arch_flush_shadow_all(kvm);
#endif
kvm_arch_destroy_vm(kvm);
kvm_destroy_devices(kvm);
kvm_free_physmem(kvm);
cleanup_srcu_struct(&kvm->srcu);
kvm_arch_free_vm(kvm);
@ -2159,6 +2174,111 @@ out:
}
#endif
static int kvm_device_ioctl_attr(struct kvm_device *dev,
int (*accessor)(struct kvm_device *dev,
struct kvm_device_attr *attr),
unsigned long arg)
{
struct kvm_device_attr attr;
if (!accessor)
return -EPERM;
if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
return -EFAULT;
return accessor(dev, &attr);
}
static long kvm_device_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
{
struct kvm_device *dev = filp->private_data;
switch (ioctl) {
case KVM_SET_DEVICE_ATTR:
return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg);
case KVM_GET_DEVICE_ATTR:
return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg);
case KVM_HAS_DEVICE_ATTR:
return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg);
default:
if (dev->ops->ioctl)
return dev->ops->ioctl(dev, ioctl, arg);
return -ENOTTY;
}
}
static int kvm_device_release(struct inode *inode, struct file *filp)
{
struct kvm_device *dev = filp->private_data;
struct kvm *kvm = dev->kvm;
kvm_put_kvm(kvm);
return 0;
}
static const struct file_operations kvm_device_fops = {
.unlocked_ioctl = kvm_device_ioctl,
.release = kvm_device_release,
};
struct kvm_device *kvm_device_from_filp(struct file *filp)
{
if (filp->f_op != &kvm_device_fops)
return NULL;
return filp->private_data;
}
static int kvm_ioctl_create_device(struct kvm *kvm,
struct kvm_create_device *cd)
{
struct kvm_device_ops *ops = NULL;
struct kvm_device *dev;
bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
int ret;
switch (cd->type) {
#ifdef CONFIG_KVM_MPIC
case KVM_DEV_TYPE_FSL_MPIC_20:
case KVM_DEV_TYPE_FSL_MPIC_42:
ops = &kvm_mpic_ops;
break;
#endif
default:
return -ENODEV;
}
if (test)
return 0;
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return -ENOMEM;
dev->ops = ops;
dev->kvm = kvm;
ret = ops->create(dev, cd->type);
if (ret < 0) {
kfree(dev);
return ret;
}
ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR);
if (ret < 0) {
ops->destroy(dev);
return ret;
}
list_add(&dev->vm_node, &kvm->devices);
kvm_get_kvm(kvm);
cd->fd = ret;
return 0;
}
static long kvm_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@ -2274,6 +2394,54 @@ static long kvm_vm_ioctl(struct file *filp,
break;
}
#endif
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
case KVM_SET_GSI_ROUTING: {
struct kvm_irq_routing routing;
struct kvm_irq_routing __user *urouting;
struct kvm_irq_routing_entry *entries;
r = -EFAULT;
if (copy_from_user(&routing, argp, sizeof(routing)))
goto out;
r = -EINVAL;
if (routing.nr >= KVM_MAX_IRQ_ROUTES)
goto out;
if (routing.flags)
goto out;
r = -ENOMEM;
entries = vmalloc(routing.nr * sizeof(*entries));
if (!entries)
goto out;
r = -EFAULT;
urouting = argp;
if (copy_from_user(entries, urouting->entries,
routing.nr * sizeof(*entries)))
goto out_free_irq_routing;
r = kvm_set_irq_routing(kvm, entries, routing.nr,
routing.flags);
out_free_irq_routing:
vfree(entries);
break;
}
#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */
case KVM_CREATE_DEVICE: {
struct kvm_create_device cd;
r = -EFAULT;
if (copy_from_user(&cd, argp, sizeof(cd)))
goto out;
r = kvm_ioctl_create_device(kvm, &cd);
if (r)
goto out;
r = -EFAULT;
if (copy_to_user(argp, &cd, sizeof(cd)))
goto out;
r = 0;
break;
}
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
if (r == -ENOTTY)
@ -2402,9 +2570,12 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
case KVM_CAP_INTERNAL_ERROR_DATA:
#ifdef CONFIG_HAVE_KVM_MSI
case KVM_CAP_SIGNAL_MSI:
#endif
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
case KVM_CAP_IRQFD_RESAMPLE:
#endif
return 1;
#ifdef KVM_CAP_IRQ_ROUTING
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
case KVM_CAP_IRQ_ROUTING:
return KVM_MAX_IRQ_ROUTES;
#endif