[vfio-users] [RFC PATCH] KVM: Add module for platform IRQ forwarding

Micah Morton mortonm at chromium.org
Thu Feb 20 18:47:36 UTC 2020


Currently, KVM/VFIO offers no way to forward interrupts into a guest VM
when those interrupts are generated by a platform device that sits
behind a PCI bus controller which is given to the VM. This module allows
for forwarding arbitrary interrupts on the host system into the guest,
supporting this platform-device-behind-PCI-controller scenario.

This code is mostly inspired/taken from the equivalent code in VFIO. I
don't necessarily expect this exact code or code that closely resembles
this to be merged -- just sharing this patch that I've written/tested to
show an example of the kind of functionality I'm looking to gain support
for in KVM. (At the very least some of the code duplication from VFIO
should probably be eliminated).

One obvious question would be why not just add this support to VFIO?
See https://www.redhat.com/archives/vfio-users/2019-December/msg00008.html
and the encompassing thread for a discussion as to why this probably
isn't the way to go.

Forwarding arbitrary IRQs to a guest VM does require the VMM to "tell"
the guest about the interrupt (e.g. through ACPI), since such info is
not discoverable by the guest like it is for PCI devices. So separate
logic is needed in the VMM to set this up -- this isn't something done
by the module shared here.

What do people think? Forwarding platform IRQs can have a big payoff for
getting platform devices to work in a guest, especially when the
platform devices sit behind a PCI bus controller that can be easily
passed through to the guest. On an Intel device I'm using for
development, this module allowed me to get multiple devices (keyboard,
touchscreen, touchpad) working in a VM guest on the device that wouldn't
have worked otherwise -- straight out of the box after passing through
the PCI bus controller with vfio-pci (plus constructing some AML for the
guest in the VMM).

NOTE: This code works for forwarding IRQs to a guest (with the VMM
calling the appropriate ioctls with the appropriate args), although it's
missing some code and testing related to shutdown/irq disable/reboot.
Works well enough to demonstrate the feasibility though.

Signed-off-by: Micah Morton <mortonm at chromium.org>
---
 include/linux/miscdevice.h          |   1 +
 include/linux/plat_irqfd.h          |  20 ++
 include/uapi/linux/platirqforward.h |  55 ++++++
 virt/lib/Kconfig                    |   3 +
 virt/lib/Makefile                   |   1 +
 virt/lib/plat_irqfd.c               | 146 ++++++++++++++
 virt/lib/platirqforward.c           | 289 ++++++++++++++++++++++++++++
 7 files changed, 515 insertions(+)
 create mode 100644 include/linux/plat_irqfd.h
 create mode 100644 include/uapi/linux/platirqforward.h
 create mode 100644 virt/lib/plat_irqfd.c
 create mode 100644 virt/lib/platirqforward.c

diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index becde6981a95..a9866dc7e10a 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -39,6 +39,7 @@
 #define IRNET_MINOR		187
 #define D7S_MINOR		193
 #define VFIO_MINOR		196
+#define PLAT_IRQ_FORWARD_MINOR	197
 #define TUN_MINOR		200
 #define CUSE_MINOR		203
 #define MWAVE_MINOR		219	/* ACP/Mwave Modem */
diff --git a/include/linux/plat_irqfd.h b/include/linux/plat_irqfd.h
new file mode 100644
index 000000000000..936a79cc7f5e
--- /dev/null
+++ b/include/linux/plat_irqfd.h
@@ -0,0 +1,20 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef PLAT_IRQFD_H
+#define PLAT_IRQFD_H
+
+struct plat_irq_forward_irqfd {
+	struct eventfd_ctx	*eventfd;
+	int			(*handler)(void *, void *);
+	void			*data;
+	wait_queue_entry_t	wait;
+	poll_table		pt;
+	struct work_struct	shutdown;
+	struct plat_irq_forward_irqfd		**pirqfd;
+};
+
+int plat_irq_forward_irqfd_enable(int (*handler)(void *, void *), void *data, struct plat_irq_forward_irqfd **pirqfd, int fd);
+#endif /* PLAT_IRQFD_H */
diff --git a/include/uapi/linux/platirqforward.h b/include/uapi/linux/platirqforward.h
new file mode 100644
index 000000000000..4b26d4cd8957
--- /dev/null
+++ b/include/uapi/linux/platirqforward.h
@@ -0,0 +1,55 @@
+/*
+ * API definition for Platform IRQ Forwarding to KVM guests
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _UAPIPLATIRQFORWARD_H
+#define _UAPIPLATIRQFORWARD_H
+
+#include <linux/ioctl.h>
+
+#define PLAT_IRQ_FORWARD_API_VERSION	0
+
+#define PLAT_IRQ_FORWARD_TYPE       (';')
+#define PLAT_IRQ_FORWARD_BASE       100
+
+struct plat_irq_forward_edge_triggered {
+	struct eventfd_ctx *trigger;
+	uint32_t irq_num;
+	struct list_head list;
+};
+
+struct plat_irq_forward_level_triggered {
+	struct eventfd_ctx *trigger;
+	struct plat_irq_forward_irqfd *unmask;
+	bool is_masked;
+	spinlock_t spinlock;
+	uint32_t irq_num;
+	struct list_head list;
+};
+
+/**
+ *
+ * Set masking and unmasking of interrupts.  Caller provides
+ * struct plat_irq_forward_set with all fields set.
+ *
+ */
+struct plat_irq_forward_set {
+	__u32	argsz;
+	__u32	action_flags;
+#define PLAT_IRQ_FORWARD_SET_LEVEL_TRIGGER_EVENTFD	(1 << 0)
+#define PLAT_IRQ_FORWARD_SET_LEVEL_UNMASK_EVENTFD	(1 << 1)
+#define PLAT_IRQ_FORWARD_SET_EDGE_TRIGGER		(1 << 2)
+	__u32	irq_number_host;
+	__u32	count;
+	__u8	eventfd[];
+};
+
+/* ---- IOCTLs for Platform IRQ Forwarding fd (/dev/plat-irq-forward) ---- */
+#define PLAT_IRQ_FORWARD_SET _IO(PLAT_IRQ_FORWARD_TYPE, PLAT_IRQ_FORWARD_BASE + 0)
+
+/* *********************************************************************** */
+
+#endif /* _UAPIPLATIRQFORWARD_H */
diff --git a/virt/lib/Kconfig b/virt/lib/Kconfig
index 2d9523b7155e..68d911ec54cc 100644
--- a/virt/lib/Kconfig
+++ b/virt/lib/Kconfig
@@ -1,3 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config IRQ_BYPASS_MANAGER
 	tristate
+
+config PLAT_IRQ_FORWARD
+	tristate "Enable forwarding arbitrary platform IRQs to guest in KVM"
diff --git a/virt/lib/Makefile b/virt/lib/Makefile
index bd7f9a78bb6b..633e793b19a5 100644
--- a/virt/lib/Makefile
+++ b/virt/lib/Makefile
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_IRQ_BYPASS_MANAGER) += irqbypass.o
+obj-$(CONFIG_PLAT_IRQ_FORWARD) += platirqforward.o plat_irqfd.o
diff --git a/virt/lib/plat_irqfd.c b/virt/lib/plat_irqfd.c
new file mode 100644
index 000000000000..1c9637f89db4
--- /dev/null
+++ b/virt/lib/plat_irqfd.c
@@ -0,0 +1,146 @@
+#include <linux/file.h>
+#include <linux/vfio.h>
+#include <linux/eventfd.h>
+#include <linux/slab.h>
+#include <uapi/linux/platirqforward.h>
+#include <linux/plat_irqfd.h>
+
+static struct workqueue_struct *vfio_irqfd_cleanup_wq;
+static DEFINE_SPINLOCK(plat_irqfd_lock);
+
+static void plat_irqfd_deactivate(struct plat_irq_forward_irqfd *plat_irqfd)
+{
+        queue_work(vfio_irqfd_cleanup_wq, &plat_irqfd->shutdown);
+}
+
+static int plat_irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
+{
+        struct plat_irq_forward_irqfd *plat_irqfd = container_of(wait, struct plat_irq_forward_irqfd, wait);
+        __poll_t flags = key_to_poll(key);
+
+        if (flags & EPOLLIN) {
+                /* An event has been signaled, call function */
+                if (!plat_irqfd->handler ||
+                     plat_irqfd->handler(NULL, plat_irqfd->data))
+                        printk(KERN_EMERG "handler failed\n");
+        }
+
+        if (flags & EPOLLHUP) {
+                unsigned long flags;
+                spin_lock_irqsave(&plat_irqfd_lock, flags);
+
+                /*
+                 * The eventfd is closing, if the plat_irqfd has not yet been
+                 * queued for release, as determined by testing whether the
+                 * plat_irqfd pointer to it is still valid, queue it now.  As
+                 * with kvm irqfds, we know we won't race against the plat_irqfd
+                 * going away because we hold the lock to get here.
+                 */
+                if (*(plat_irqfd->pirqfd) == plat_irqfd) {
+                        *(plat_irqfd->pirqfd) = NULL;
+                        plat_irqfd_deactivate(plat_irqfd);
+                }
+
+                spin_unlock_irqrestore(&plat_irqfd_lock, flags);
+        }
+
+        return 0;
+}
+
+
+static void plat_irqfd_ptable_queue_proc(struct file *file,
+                                     wait_queue_head_t *wqh, poll_table *pt)
+{
+        struct plat_irq_forward_irqfd *plat_irqfd = container_of(pt, struct plat_irq_forward_irqfd, pt);
+        add_wait_queue(wqh, &plat_irqfd->wait);
+}
+
+static void plat_irqfd_shutdown(struct work_struct *work)
+{
+        struct plat_irq_forward_irqfd *plat_irqfd = container_of(work, struct plat_irq_forward_irqfd, shutdown);
+        u64 cnt;
+
+        eventfd_ctx_remove_wait_queue(plat_irqfd->eventfd, &plat_irqfd->wait, &cnt);
+        eventfd_ctx_put(plat_irqfd->eventfd);
+
+        kfree(plat_irqfd);
+}
+
+int plat_irq_forward_irqfd_enable(int (*handler)(void *, void *), void *data, struct plat_irq_forward_irqfd **pirqfd, int fd)
+{
+        struct fd irqfd;
+        struct eventfd_ctx *ctx;
+        struct plat_irq_forward_irqfd *plat_irqfd;
+        int ret = 0;
+        unsigned int events;
+
+        plat_irqfd = kzalloc(sizeof(*plat_irqfd), GFP_KERNEL);
+        if (!plat_irqfd)
+                return -ENOMEM;
+
+        plat_irqfd->pirqfd = pirqfd;
+        plat_irqfd->handler = handler;
+        plat_irqfd->data = data;
+
+        // shutdown causes crash
+        INIT_WORK(&plat_irqfd->shutdown, plat_irqfd_shutdown);
+
+        irqfd = fdget(fd);
+        if (!irqfd.file) {
+                ret = -EBADF;
+                goto err_fd;
+        }
+
+        ctx = eventfd_ctx_fileget(irqfd.file);
+        if (IS_ERR(ctx)) {
+                ret = PTR_ERR(ctx);
+                goto err_ctx;
+        }
+
+        plat_irqfd->eventfd = ctx;
+
+         // plat_irqfds can be released by closing the eventfd or directly
+         // through ioctl.  These are both done through a workqueue, so
+         // we update the pointer to the plat_irqfd under lock to avoid
+         // pushing multiple jobs to release the same plat_irqfd.
+        spin_lock_irq(&plat_irqfd_lock);
+
+        if (*pirqfd) {
+                printk(KERN_EMERG "pirqfd should be NULL. BUG!\n");
+                spin_unlock_irq(&plat_irqfd_lock);
+                ret = -EBUSY;
+                goto err_busy;
+        }
+        *pirqfd = plat_irqfd;
+
+        spin_unlock_irq(&plat_irqfd_lock);
+
+         // Install our own custom wake-up handling so we are notified via
+         // a callback whenever someone signals the underlying eventfd.
+        init_waitqueue_func_entry(&plat_irqfd->wait, plat_irqfd_wakeup);
+        init_poll_funcptr(&plat_irqfd->pt, plat_irqfd_ptable_queue_proc);
+
+        events = irqfd.file->f_op->poll(irqfd.file, &plat_irqfd->pt);
+
+         // Check if there was an event already pending on the eventfd
+         // before we registered and trigger it as if we didn't miss it.
+        if (events & POLLIN) {
+                if (!handler || handler(NULL, data))
+                        printk(KERN_EMERG "handler failed\n");
+        }
+
+         // Do not drop the file until the irqfd is fully initialized,
+         // otherwise we might race against the POLLHUP.
+        fdput(irqfd);
+
+        return 0;
+err_busy:
+        eventfd_ctx_put(ctx);
+err_ctx:
+        fdput(irqfd);
+err_fd:
+        kfree(plat_irqfd);
+
+        return ret;
+}
+EXPORT_SYMBOL_GPL(plat_irq_forward_plat_irqfd_enable);
diff --git a/virt/lib/platirqforward.c b/virt/lib/platirqforward.c
new file mode 100644
index 000000000000..22ca24a27ad0
--- /dev/null
+++ b/virt/lib/platirqforward.c
@@ -0,0 +1,289 @@
+#include <linux/cdev.h>
+#include <linux/compat.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/vfio.h>
+#include <linux/eventfd.h>
+#include <linux/delay.h>
+#include <uapi/linux/platirqforward.h>
+#include <linux/plat_irqfd.h>
+
+#define VERSION	"0.1"
+#define AUTHOR	"Micah Morton <mortonm at chromium.org>"
+#define DESC	"Platform IRQ Forwarding"
+
+MODULE_VERSION(VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(AUTHOR);
+MODULE_DESCRIPTION(DESC);
+MODULE_ALIAS_MISCDEV(PLAT_IRQ_FORWARD_MINOR);
+MODULE_ALIAS("devname:plat-irq-forward");
+
+static LIST_HEAD(level_triggered_irqs);
+static LIST_HEAD(edge_triggered_irqs);
+
+
+static int plat_irq_forward_unmask_handler_level(void *opaque, void *level)
+{
+	unsigned long flags;
+	struct plat_irq_forward_level_triggered *l = (struct plat_irq_forward_level_triggered *) level;
+
+	spin_lock_irqsave(&(l->spinlock), flags);
+	if (l->is_masked) {
+		enable_irq(l->irq_num);
+		l->is_masked = false;
+	}
+	spin_unlock_irqrestore(&(l->spinlock), flags);
+	return 0;
+}
+
+
+static irqreturn_t plat_irq_forward_handler_level(int irq, void *level)
+{
+	unsigned long flags;
+	int ret = IRQ_NONE;
+	struct plat_irq_forward_level_triggered *l = (struct plat_irq_forward_level_triggered *) level;
+	spin_lock_irqsave(&(l->spinlock), flags);
+
+	disable_irq_nosync(irq);
+	l->is_masked = true;
+	ret = IRQ_HANDLED;
+
+	spin_unlock_irqrestore(&(l->spinlock), flags);
+
+	if (ret == IRQ_HANDLED)
+	        eventfd_signal(l->trigger, 1);
+
+	return ret;
+}
+
+static irqreturn_t plat_irq_forward_handler_edge(int irq, void *edge)
+{
+        eventfd_signal(((struct plat_irq_forward_edge_triggered*)edge)->trigger, 1);
+
+        return IRQ_HANDLED;
+}
+
+static int plat_irq_forward_set_level_trigger(void *data, uint32_t irq_number_host, struct plat_irq_forward_level_triggered *level)
+{
+	int32_t fd;
+	struct eventfd_ctx *trigger;
+	int ret;
+
+	fd = *(int32_t *)data;
+
+	if (fd < 0) /* Disable only */
+	        return 0;
+
+	trigger = eventfd_ctx_fdget(fd);
+	if (IS_ERR(trigger)) {
+	        return PTR_ERR(trigger);
+	}
+
+	level->trigger = trigger;
+	spin_lock_init(&(level->spinlock));
+
+	ret = request_irq(irq_number_host, plat_irq_forward_handler_level, 0, "level-triggered-irq", level);
+	if (ret) {
+	        level->trigger = NULL;
+	        eventfd_ctx_put(trigger);
+	        return ret;
+	}
+
+	return 0;
+
+}
+
+static int plat_irq_forward_set_level_unmask(void *data, struct plat_irq_forward_level_triggered *level)
+{
+	int32_t fd;
+	fd = *(int32_t *)data;
+
+	if (fd >= 0)
+	        return plat_irq_forward_irqfd_enable(plat_irq_forward_unmask_handler_level, level, &(level->unmask), fd);
+	return -1;
+}
+
+static int plat_irq_forward_set_edge_trigger(void *data, uint32_t irq_number_host, struct plat_irq_forward_edge_triggered *edge)
+{
+	struct eventfd_ctx *trigger;
+    int ret;
+	int32_t fd;
+	fd = *(int32_t *)data;
+
+	if (fd < 0) /* Disable only */
+                return 0;
+
+        trigger = eventfd_ctx_fdget(fd);
+        if (IS_ERR(trigger)) {
+                return PTR_ERR(trigger);
+        }
+
+        edge->trigger = trigger;
+
+        ret = request_irq(irq_number_host, plat_irq_forward_handler_edge, IRQF_SHARED, "edge-triggered-irq", edge);
+        if (ret) {
+                edge->trigger = NULL;
+                eventfd_ctx_put(trigger);
+                return ret;
+        }
+
+        return 0;
+}
+
+
+int platform_set_irqs_ioctl_level_trigger(uint32_t irq_number_host, void *data) {
+
+        struct plat_irq_forward_level_triggered *level_irq = kzalloc(sizeof(struct plat_irq_forward_level_triggered), GFP_KERNEL);
+        if (!level_irq)
+                return -ENOMEM;
+        level_irq->trigger = NULL;
+        level_irq->irq_num = irq_number_host;
+        level_irq->unmask = NULL;
+        level_irq->is_masked = true;
+        list_add(&(level_irq->list), &level_triggered_irqs);
+
+        return plat_irq_forward_set_level_trigger(data, irq_number_host, level_irq);
+}
+
+int platform_set_irqs_ioctl_level_unmask(uint32_t irq_number_host, void *data) {
+
+        struct list_head* position = NULL;
+        struct plat_irq_forward_level_triggered *level_irq = NULL;
+        // We must already have a trigger for the IRQ before we add an unmask
+        list_for_each(position, &level_triggered_irqs) {
+                level_irq = list_entry(position, struct plat_irq_forward_level_triggered, list);
+                if (level_irq->irq_num == irq_number_host)
+                        return plat_irq_forward_set_level_unmask(data, level_irq);
+        }
+
+        return -1;
+}
+
+int platform_set_irqs_ioctl_edge_trigger(uint32_t irq_number_host, void *data) {
+
+        struct plat_irq_forward_edge_triggered *edge_irq = kzalloc(sizeof(struct plat_irq_forward_edge_triggered), GFP_KERNEL);
+        if (!edge_irq)
+                return -ENOMEM;
+        edge_irq->trigger = NULL;
+        edge_irq->irq_num = irq_number_host;
+        list_add(&(edge_irq->list), &edge_triggered_irqs);
+
+        return plat_irq_forward_set_edge_trigger(data, irq_number_host, edge_irq);
+}
+
+int plat_irq_forward_ioctl(void *device_data, unsigned long arg)
+{
+	u8 *data = NULL;
+	unsigned long minsz;
+	struct plat_irq_forward_set hdr;
+
+
+	minsz = offsetofend(struct plat_irq_forward_set, count);
+
+	if (copy_from_user(&hdr, (void __user *)arg, minsz))
+                return -EFAULT;
+
+	data = memdup_user((void __user *)(arg + minsz), sizeof(int32_t));
+	if (IS_ERR(data))
+	        return PTR_ERR(data);
+
+    switch (hdr.action_flags)
+    {
+        case PLAT_IRQ_FORWARD_SET_LEVEL_TRIGGER_EVENTFD:
+            return platform_set_irqs_ioctl_level_trigger(hdr.irq_number_host, data);
+        case PLAT_IRQ_FORWARD_SET_LEVEL_UNMASK_EVENTFD:
+            return platform_set_irqs_ioctl_level_unmask(hdr.irq_number_host, data);
+        case PLAT_IRQ_FORWARD_SET_EDGE_TRIGGER:
+            return platform_set_irqs_ioctl_edge_trigger(hdr.irq_number_host, data);
+        default:
+            return -EINVAL;
+    }
+
+	kfree(data);
+	return 0;
+}
+
+/**
+ * Platform IRQ Forwarding fd, /dev/plat-irq-forward
+ */
+static long plat_irq_forward_fops_unl_ioctl(struct file *filep,
+				unsigned int cmd, unsigned long arg)
+{
+	long ret = -EINVAL;
+
+	switch (cmd) {
+	case PLAT_IRQ_FORWARD_SET:
+		ret = (long) plat_irq_forward_ioctl(filep, arg);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long plat_irq_forward_fops_compat_ioctl(struct file *filep,
+				   unsigned int cmd, unsigned long arg)
+{
+	arg = (unsigned long)compat_ptr(arg);
+	return plat_irq_forward_fops_unl_ioctl(filep, cmd, arg);
+}
+#endif	/* CONFIG_COMPAT */
+
+static int plat_irq_forward_fops_open(struct inode *inode, struct file *filep)
+{
+	return 0;
+}
+
+static int plat_irq_forward_fops_release(struct inode *inode, struct file *filep)
+{
+	return 0;
+}
+
+static const struct file_operations plat_irq_forward_fops = {
+	.owner		= THIS_MODULE,
+	.open		= plat_irq_forward_fops_open,
+	.release	= plat_irq_forward_fops_release,
+	.unlocked_ioctl	= plat_irq_forward_fops_unl_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= plat_irq_forward_fops_compat_ioctl,
+#endif
+};
+
+static struct miscdevice plat_irq_forward_dev = {
+	.minor = PLAT_IRQ_FORWARD_MINOR,
+	.name = "plat-irq-forward",
+	.fops = &plat_irq_forward_fops,
+	.nodename = "plat-irq-forward",
+	.mode = S_IRUGO | S_IWUGO,
+};
+
+static int __init plat_irq_forward_init(void)
+{
+	int ret;
+
+	ret = misc_register(&plat_irq_forward_dev);
+	if (ret) {
+		pr_err("plat-irq-forward: misc device register failed\n");
+		return ret;
+	}
+
+	pr_info(DESC " version: " VERSION "\n");
+
+	return 0;
+}
+
+// TODO: cleanup/free/disconnect stuff
+static void __exit plat_irq_forward_cleanup(void)
+{
+	misc_deregister(&plat_irq_forward_dev);
+}
+
+module_init(plat_irq_forward_init);
+module_exit(plat_irq_forward_cleanup);
-- 
2.25.0.265.gbab2e86ba0-goog





More information about the vfio-users mailing list