KVM: PPC: Book3S: Add API for in-kernel XICS emulation

This adds the API for userspace to instantiate an XICS device in a VM
and connect VCPUs to it.  The API consists of a new device type for
the KVM_CREATE_DEVICE ioctl, a new capability KVM_CAP_IRQ_XICS, which
functions similarly to KVM_CAP_IRQ_MPIC, and the KVM_IRQ_LINE ioctl,
which is used to assert and deassert interrupt inputs of the XICS.

The XICS device has one attribute group, KVM_DEV_XICS_GRP_SOURCES.
Each attribute within this group corresponds to the state of one
interrupt source.  The attribute number is the same as the interrupt
source number.

This does not support irq routing or irqfd yet.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
This commit is contained in:
Paul Mackerras 2013-04-27 00:28:37 +00:00 committed by Alexander Graf
parent d133b40f2c
commit 5975a2e095
11 changed files with 289 additions and 27 deletions

View File

@ -2772,3 +2772,11 @@ Parameters: args[0] is the MPIC device fd
args[1] is the MPIC CPU number for this vcpu
This capability connects the vcpu to an in-kernel MPIC device.
6.7 KVM_CAP_IRQ_XICS
Architectures: ppc
Parameters: args[0] is the XICS device fd
args[1] is the XICS CPU number (server ID) for this vcpu
This capability connects the vcpu to an in-kernel XICS device.

View File

@ -0,0 +1,66 @@
XICS interrupt controller
Device type supported: KVM_DEV_TYPE_XICS
Groups:
KVM_DEV_XICS_SOURCES
Attributes: One per interrupt source, indexed by the source number.
This device emulates the XICS (eXternal Interrupt Controller
Specification) defined in PAPR. The XICS has a set of interrupt
sources, each identified by a 20-bit source number, and a set of
Interrupt Control Presentation (ICP) entities, also called "servers",
each associated with a virtual CPU.
The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH
capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and
the interrupt server number (i.e. the vcpu number from the XICS's
point of view) in args[1] of the kvm_enable_cap struct. Each ICP has
64 bits of state which can be read and written using the
KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu. The 64 bit
state word has the following bitfields, starting at the
least-significant end of the word:
* Unused, 16 bits
* Pending interrupt priority, 8 bits
Zero is the highest priority, 255 means no interrupt is pending.
* Pending IPI (inter-processor interrupt) priority, 8 bits
Zero is the highest priority, 255 means no IPI is pending.
* Pending interrupt source number, 24 bits
Zero means no interrupt pending, 2 means an IPI is pending
* Current processor priority, 8 bits
Zero is the highest priority, meaning no interrupts can be
delivered, and 255 is the lowest priority.
Each source has 64 bits of state that can be read and written using
the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the
KVM_DEV_XICS_SOURCES attribute group, with the attribute number being
the interrupt source number. The 64 bit state word has the following
bitfields, starting from the least-significant end of the word:
* Destination (server number), 32 bits
This specifies where the interrupt should be sent, and is the
interrupt server number specified for the destination vcpu.
* Priority, 8 bits
This is the priority specified for this interrupt source, where 0 is
the highest priority and 255 is the lowest. An interrupt with a
priority of 255 will never be delivered.
* Level sensitive flag, 1 bit
This bit is 1 for a level-sensitive interrupt source, or 0 for
edge-sensitive (or MSI).
* Masked flag, 1 bit
This bit is set to 1 if the interrupt is masked (cannot be delivered
regardless of its priority), for example by the ibm,int-off RTAS
call, or 0 if it is not masked.
* Pending flag, 1 bit
This bit is 1 if the source has a pending interrupt, otherwise 0.
Only one XICS instance may be created per VM.

View File

@ -315,6 +315,8 @@ extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
#else
static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ return 0; }

View File

@ -499,4 +499,16 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
#define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
/* PPC64 eXternal Interrupt Controller Specification */
#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
/* Layout of 64-bit source attribute values */
#define KVM_XICS_DESTINATION_SHIFT 0
#define KVM_XICS_DESTINATION_MASK 0xffffffffULL
#define KVM_XICS_PRIORITY_SHIFT 32
#define KVM_XICS_PRIORITY_MASK 0xff
#define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40)
#define KVM_XICS_MASKED (1ULL << 41)
#define KVM_XICS_PENDING (1ULL << 42)
#endif /* __LINUX_KVM_POWERPC_H */

View File

@ -11,6 +11,7 @@
#include <linux/kvm_host.h>
#include <linux/err.h>
#include <linux/gfp.h>
#include <linux/anon_inodes.h>
#include <asm/uaccess.h>
#include <asm/kvm_book3s.h>
@ -55,8 +56,6 @@
*
* - Make ICS lockless as well, or at least a per-interrupt lock or hashed
* locks array to improve scalability
*
* - ioctl's to save/restore the entire state for snapshot & migration
*/
/* -- ICS routines -- */
@ -64,7 +63,8 @@
static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
u32 new_irq);
static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level,
bool report_status)
{
struct ics_irq_state *state;
struct kvmppc_ics *ics;
@ -81,6 +81,9 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
if (!state->exists)
return -EINVAL;
if (report_status)
return state->asserted;
/*
* We set state->asserted locklessly. This should be fine as
* we are the only setter, thus concurrent access is undefined
@ -96,7 +99,7 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
/* Attempt delivery */
icp_deliver_irq(xics, NULL, irq);
return 0;
return state->asserted;
}
static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
@ -891,8 +894,8 @@ static void xics_debugfs_init(struct kvmppc_xics *xics)
kfree(name);
}
struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
struct kvmppc_xics *xics, int irq)
static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
struct kvmppc_xics *xics, int irq)
{
struct kvmppc_ics *ics;
int i, icsid;
@ -1044,34 +1047,138 @@ int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
return 0;
}
/* -- ioctls -- */
int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args)
static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
{
struct kvmppc_xics *xics;
int r;
int ret;
struct kvmppc_ics *ics;
struct ics_irq_state *irqp;
u64 __user *ubufp = (u64 __user *) addr;
u16 idx;
u64 val, prio;
/* locking against multiple callers? */
ics = kvmppc_xics_find_ics(xics, irq, &idx);
if (!ics)
return -ENOENT;
xics = kvm->arch.xics;
if (!xics)
return -ENODEV;
switch (args->level) {
case KVM_INTERRUPT_SET:
case KVM_INTERRUPT_SET_LEVEL:
case KVM_INTERRUPT_UNSET:
r = ics_deliver_irq(xics, args->irq, args->level);
break;
default:
r = -EINVAL;
irqp = &ics->irq_state[idx];
mutex_lock(&ics->lock);
ret = -ENOENT;
if (irqp->exists) {
val = irqp->server;
prio = irqp->priority;
if (prio == MASKED) {
val |= KVM_XICS_MASKED;
prio = irqp->saved_priority;
}
val |= prio << KVM_XICS_PRIORITY_SHIFT;
if (irqp->asserted)
val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING;
else if (irqp->masked_pending || irqp->resend)
val |= KVM_XICS_PENDING;
ret = 0;
}
mutex_unlock(&ics->lock);
return r;
if (!ret && put_user(val, ubufp))
ret = -EFAULT;
return ret;
}
void kvmppc_xics_free(struct kvmppc_xics *xics)
static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
{
struct kvmppc_ics *ics;
struct ics_irq_state *irqp;
u64 __user *ubufp = (u64 __user *) addr;
u16 idx;
u64 val;
u8 prio;
u32 server;
if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
return -ENOENT;
ics = kvmppc_xics_find_ics(xics, irq, &idx);
if (!ics) {
ics = kvmppc_xics_create_ics(xics->kvm, xics, irq);
if (!ics)
return -ENOMEM;
}
irqp = &ics->irq_state[idx];
if (get_user(val, ubufp))
return -EFAULT;
server = val & KVM_XICS_DESTINATION_MASK;
prio = val >> KVM_XICS_PRIORITY_SHIFT;
if (prio != MASKED &&
kvmppc_xics_find_server(xics->kvm, server) == NULL)
return -EINVAL;
mutex_lock(&ics->lock);
irqp->server = server;
irqp->saved_priority = prio;
if (val & KVM_XICS_MASKED)
prio = MASKED;
irqp->priority = prio;
irqp->resend = 0;
irqp->masked_pending = 0;
irqp->asserted = 0;
if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE))
irqp->asserted = 1;
irqp->exists = 1;
mutex_unlock(&ics->lock);
if (val & KVM_XICS_PENDING)
icp_deliver_irq(xics, NULL, irqp->number);
return 0;
}
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status)
{
struct kvmppc_xics *xics = kvm->arch.xics;
return ics_deliver_irq(xics, irq, level, line_status);
}
static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
struct kvmppc_xics *xics = dev->private;
switch (attr->group) {
case KVM_DEV_XICS_GRP_SOURCES:
return xics_set_source(xics, attr->attr, attr->addr);
}
return -ENXIO;
}
static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
struct kvmppc_xics *xics = dev->private;
switch (attr->group) {
case KVM_DEV_XICS_GRP_SOURCES:
return xics_get_source(xics, attr->attr, attr->addr);
}
return -ENXIO;
}
static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
switch (attr->group) {
case KVM_DEV_XICS_GRP_SOURCES:
if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
attr->attr < KVMPPC_XICS_NR_IRQS)
return 0;
break;
}
return -ENXIO;
}
static void kvmppc_xics_free(struct kvm_device *dev)
{
struct kvmppc_xics *xics = dev->private;
int i;
struct kvm *kvm = xics->kvm;
@ -1083,17 +1190,21 @@ void kvmppc_xics_free(struct kvmppc_xics *xics)
for (i = 0; i <= xics->max_icsid; i++)
kfree(xics->ics[i]);
kfree(xics);
kfree(dev);
}
int kvm_xics_create(struct kvm *kvm, u32 type)
static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
{
struct kvmppc_xics *xics;
struct kvm *kvm = dev->kvm;
int ret = 0;
xics = kzalloc(sizeof(*xics), GFP_KERNEL);
if (!xics)
return -ENOMEM;
dev->private = xics;
xics->dev = dev;
xics->kvm = kvm;
/* Already there ? */
@ -1120,6 +1231,35 @@ int kvm_xics_create(struct kvm *kvm, u32 type)
return 0;
}
struct kvm_device_ops kvm_xics_ops = {
.name = "kvm-xics",
.create = kvmppc_xics_create,
.destroy = kvmppc_xics_free,
.set_attr = xics_set_attr,
.get_attr = xics_get_attr,
.has_attr = xics_has_attr,
};
int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
u32 xcpu)
{
struct kvmppc_xics *xics = dev->private;
int r = -EBUSY;
if (dev->ops != &kvm_xics_ops)
return -EPERM;
if (xics->kvm != vcpu->kvm)
return -EPERM;
if (vcpu->arch.irq_type)
return -EBUSY;
r = kvmppc_xics_create_icp(vcpu, xcpu);
if (!r)
vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
return r;
}
void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
{
if (!vcpu->arch.icp)

View File

@ -88,6 +88,7 @@ struct kvmppc_ics {
struct kvmppc_xics {
struct kvm *kvm;
struct kvm_device *dev;
struct dentry *dentry;
u32 max_icsid;
bool real_mode;

View File

@ -9,6 +9,9 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
#ifdef CONFIG_KVM_MPIC
ret = ret || (kvm->arch.mpic != NULL);
#endif
#ifdef CONFIG_KVM_XICS
ret = ret || (kvm->arch.xics != NULL);
#endif
smp_rmb();
return ret;

View File

@ -342,6 +342,9 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_SPAPR_TCE:
case KVM_CAP_PPC_ALLOC_HTAB:
case KVM_CAP_PPC_RTAS:
#ifdef CONFIG_KVM_XICS
case KVM_CAP_IRQ_XICS:
#endif
r = 1;
break;
#endif /* CONFIG_PPC_BOOK3S_64 */
@ -837,6 +840,25 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
break;
}
#endif
#ifdef CONFIG_KVM_XICS
case KVM_CAP_IRQ_XICS: {
struct file *filp;
struct kvm_device *dev;
r = -EBADF;
filp = fget(cap->args[0]);
if (!filp)
break;
r = -EPERM;
dev = kvm_device_from_filp(filp);
if (dev)
r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
fput(filp);
break;
}
#endif /* CONFIG_KVM_XICS */
default:
r = -EINVAL;
break;

View File

@ -1086,6 +1086,7 @@ void kvm_device_put(struct kvm_device *dev);
struct kvm_device *kvm_device_from_filp(struct file *filp);
extern struct kvm_device_ops kvm_mpic_ops;
extern struct kvm_device_ops kvm_xics_ops;
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT

View File

@ -665,6 +665,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_DEVICE_CTRL 89
#define KVM_CAP_IRQ_MPIC 90
#define KVM_CAP_PPC_RTAS 91
#define KVM_CAP_IRQ_XICS 92
#ifdef KVM_CAP_IRQ_ROUTING
@ -837,6 +838,7 @@ struct kvm_device_attr {
#define KVM_DEV_TYPE_FSL_MPIC_20 1
#define KVM_DEV_TYPE_FSL_MPIC_42 2
#define KVM_DEV_TYPE_XICS 3
/*
* ioctls for VM fds

View File

@ -2246,6 +2246,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
case KVM_DEV_TYPE_FSL_MPIC_42:
ops = &kvm_mpic_ops;
break;
#endif
#ifdef CONFIG_KVM_XICS
case KVM_DEV_TYPE_XICS:
ops = &kvm_xics_ops;
break;
#endif
default:
return -ENODEV;