root/samples/vfio-mdev/mtty.c
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Mediated virtual PCI serial host device driver
 *
 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
 *     Author: Neo Jia <cjia@nvidia.com>
 *             Kirti Wankhede <kwankhede@nvidia.com>
 *
 * Sample driver that creates mdev device that simulates serial port over PCI
 * card.
 */

#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/cdev.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/vfio.h>
#include <linux/iommu.h>
#include <linux/sysfs.h>
#include <linux/ctype.h>
#include <linux/file.h>
#include <linux/mdev.h>
#include <linux/pci.h>
#include <linux/serial.h>
#include <uapi/linux/serial_reg.h>
#include <linux/eventfd.h>
#include <linux/anon_inodes.h>

/*
 * #defines
 */

#define VERSION_STRING  "0.1"
#define DRIVER_AUTHOR   "NVIDIA Corporation"

#define MTTY_CLASS_NAME "mtty"

#define MTTY_NAME       "mtty"

#define MTTY_STRING_LEN         16

#define MTTY_CONFIG_SPACE_SIZE  0xff
#define MTTY_IO_BAR_SIZE        0x8
#define MTTY_MMIO_BAR_SIZE      0x100000

#define STORE_LE16(addr, val)   (*(u16 *)addr = val)
#define STORE_LE32(addr, val)   (*(u32 *)addr = val)

#define MAX_FIFO_SIZE   16

#define CIRCULAR_BUF_INC_IDX(idx)    (idx = (idx + 1) & (MAX_FIFO_SIZE - 1))

#define MTTY_VFIO_PCI_OFFSET_SHIFT   40

#define MTTY_VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> MTTY_VFIO_PCI_OFFSET_SHIFT)
#define MTTY_VFIO_PCI_INDEX_TO_OFFSET(index) \
                                ((u64)(index) << MTTY_VFIO_PCI_OFFSET_SHIFT)
#define MTTY_VFIO_PCI_OFFSET_MASK    \
                                (((u64)(1) << MTTY_VFIO_PCI_OFFSET_SHIFT) - 1)
#define MAX_MTTYS       24

/*
 * Global Structures
 */

static struct mtty_dev {
        dev_t           vd_devt;
        struct class    *vd_class;
        struct cdev     vd_cdev;
        struct idr      vd_idr;
        struct device   dev;
        struct mdev_parent parent;
} mtty_dev;

struct mdev_region_info {
        u64 start;
        u64 phys_start;
        u32 size;
        u64 vfio_offset;
};

#if defined(DEBUG_REGS)
static const char *wr_reg[] = {
        "TX",
        "IER",
        "FCR",
        "LCR",
        "MCR",
        "LSR",
        "MSR",
        "SCR"
};

static const char *rd_reg[] = {
        "RX",
        "IER",
        "IIR",
        "LCR",
        "MCR",
        "LSR",
        "MSR",
        "SCR"
};
#endif

/* loop back buffer */
struct rxtx {
        u8 fifo[MAX_FIFO_SIZE];
        u8 head, tail;
        u8 count;
};

struct serial_port {
        u8 uart_reg[8];         /* 8 registers */
        struct rxtx rxtx;       /* loop back buffer */
        bool dlab;
        bool overrun;
        u16 divisor;
        u8 fcr;                 /* FIFO control register */
        u8 max_fifo_size;
        u8 intr_trigger_level;  /* interrupt trigger level */
};

struct mtty_data {
        u64 magic;
#define MTTY_MAGIC 0x7e9d09898c3e2c4e /* Nothing clever, just random */
        u32 major_ver;
#define MTTY_MAJOR_VER 1
        u32 minor_ver;
#define MTTY_MINOR_VER 0
        u32 nr_ports;
        u32 flags;
        struct serial_port ports[2];
};

struct mdev_state;

struct mtty_migration_file {
        struct file *filp;
        struct mutex lock;
        struct mdev_state *mdev_state;
        struct mtty_data data;
        ssize_t filled_size;
        u8 disabled:1;
};

/* State of each mdev device */
struct mdev_state {
        struct vfio_device vdev;
        struct eventfd_ctx *intx_evtfd;
        struct eventfd_ctx *msi_evtfd;
        int irq_index;
        u8 *vconfig;
        struct mutex ops_lock;
        struct mdev_device *mdev;
        struct mdev_region_info region_info[VFIO_PCI_NUM_REGIONS];
        u32 bar_mask[VFIO_PCI_NUM_REGIONS];
        struct list_head next;
        struct serial_port s[2];
        struct mutex rxtx_lock;
        struct vfio_device_info dev_info;
        int nr_ports;
        enum vfio_device_mig_state state;
        struct mutex state_mutex;
        struct mutex reset_mutex;
        struct mtty_migration_file *saving_migf;
        struct mtty_migration_file *resuming_migf;
        u8 deferred_reset:1;
        u8 intx_mask:1;
};

static struct mtty_type {
        struct mdev_type type;
        int nr_ports;
} mtty_types[2] = {
        { .nr_ports = 1, .type.sysfs_name = "1",
          .type.pretty_name = "Single port serial" },
        { .nr_ports = 2, .type.sysfs_name = "2",
          .type.pretty_name = "Dual port serial" },
};

static struct mdev_type *mtty_mdev_types[] = {
        &mtty_types[0].type,
        &mtty_types[1].type,
};

static atomic_t mdev_avail_ports = ATOMIC_INIT(MAX_MTTYS);

static const struct file_operations vd_fops = {
        .owner          = THIS_MODULE,
};

static const struct vfio_device_ops mtty_dev_ops;

/* Helper functions */

static void dump_buffer(u8 *buf, uint32_t count)
{
#if defined(DEBUG)
        int i;

        pr_info("Buffer:\n");
        for (i = 0; i < count; i++) {
                pr_info("%2x ", *(buf + i));
                if ((i + 1) % 16 == 0)
                        pr_info("\n");
        }
#endif
}

static bool is_intx(struct mdev_state *mdev_state)
{
        return mdev_state->irq_index == VFIO_PCI_INTX_IRQ_INDEX;
}

static bool is_msi(struct mdev_state *mdev_state)
{
        return mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX;
}

static bool is_noirq(struct mdev_state *mdev_state)
{
        return !is_intx(mdev_state) && !is_msi(mdev_state);
}

static void mtty_trigger_interrupt(struct mdev_state *mdev_state)
{
        lockdep_assert_held(&mdev_state->ops_lock);

        if (is_msi(mdev_state)) {
                if (mdev_state->msi_evtfd)
                        eventfd_signal(mdev_state->msi_evtfd);
        } else if (is_intx(mdev_state)) {
                if (mdev_state->intx_evtfd && !mdev_state->intx_mask) {
                        eventfd_signal(mdev_state->intx_evtfd);
                        mdev_state->intx_mask = true;
                }
        }
}

static void mtty_create_config_space(struct mdev_state *mdev_state)
{
        /* PCI dev ID */
        STORE_LE32((u32 *) &mdev_state->vconfig[0x0], 0x32534348);

        /* Control: I/O+, Mem-, BusMaster- */
        STORE_LE16((u16 *) &mdev_state->vconfig[0x4], 0x0001);

        /* Status: capabilities list absent */
        STORE_LE16((u16 *) &mdev_state->vconfig[0x6], 0x0200);

        /* Rev ID */
        mdev_state->vconfig[0x8] =  0x10;

        /* programming interface class : 16550-compatible serial controller */
        mdev_state->vconfig[0x9] =  0x02;

        /* Sub class : 00 */
        mdev_state->vconfig[0xa] =  0x00;

        /* Base class : Simple Communication controllers */
        mdev_state->vconfig[0xb] =  0x07;

        /* base address registers */
        /* BAR0: IO space */
        STORE_LE32((u32 *) &mdev_state->vconfig[0x10], 0x000001);
        mdev_state->bar_mask[0] = ~(MTTY_IO_BAR_SIZE) + 1;

        if (mdev_state->nr_ports == 2) {
                /* BAR1: IO space */
                STORE_LE32((u32 *) &mdev_state->vconfig[0x14], 0x000001);
                mdev_state->bar_mask[1] = ~(MTTY_IO_BAR_SIZE) + 1;
        }

        /* Subsystem ID */
        STORE_LE32((u32 *) &mdev_state->vconfig[0x2c], 0x32534348);

        mdev_state->vconfig[0x34] =  0x00;   /* Cap Ptr */
        mdev_state->vconfig[0x3d] =  0x01;   /* interrupt pin (INTA#) */

        /* Vendor specific data */
        mdev_state->vconfig[0x40] =  0x23;
        mdev_state->vconfig[0x43] =  0x80;
        mdev_state->vconfig[0x44] =  0x23;
        mdev_state->vconfig[0x48] =  0x23;
        mdev_state->vconfig[0x4c] =  0x23;

        mdev_state->vconfig[0x60] =  0x50;
        mdev_state->vconfig[0x61] =  0x43;
        mdev_state->vconfig[0x62] =  0x49;
        mdev_state->vconfig[0x63] =  0x20;
        mdev_state->vconfig[0x64] =  0x53;
        mdev_state->vconfig[0x65] =  0x65;
        mdev_state->vconfig[0x66] =  0x72;
        mdev_state->vconfig[0x67] =  0x69;
        mdev_state->vconfig[0x68] =  0x61;
        mdev_state->vconfig[0x69] =  0x6c;
        mdev_state->vconfig[0x6a] =  0x2f;
        mdev_state->vconfig[0x6b] =  0x55;
        mdev_state->vconfig[0x6c] =  0x41;
        mdev_state->vconfig[0x6d] =  0x52;
        mdev_state->vconfig[0x6e] =  0x54;
}

static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset,
                                 u8 *buf, u32 count)
{
        u32 cfg_addr, bar_mask, bar_index = 0;

        switch (offset) {
        case 0x04: /* device control */
        case 0x06: /* device status */
                /* do nothing */
                break;
        case 0x3c:  /* interrupt line */
                mdev_state->vconfig[0x3c] = buf[0];
                break;
        case 0x3d:
                /*
                 * Interrupt Pin is hardwired to INTA.
                 * This field is write protected by hardware
                 */
                break;
        case 0x10:  /* BAR0 */
        case 0x14:  /* BAR1 */
                if (offset == 0x10)
                        bar_index = 0;
                else if (offset == 0x14)
                        bar_index = 1;

                if ((mdev_state->nr_ports == 1) && (bar_index == 1)) {
                        STORE_LE32(&mdev_state->vconfig[offset], 0);
                        break;
                }

                cfg_addr = *(u32 *)buf;
                pr_info("BAR%d addr 0x%x\n", bar_index, cfg_addr);

                if (cfg_addr == 0xffffffff) {
                        bar_mask = mdev_state->bar_mask[bar_index];
                        cfg_addr = (cfg_addr & bar_mask);
                }

                cfg_addr |= (mdev_state->vconfig[offset] & 0x3ul);
                STORE_LE32(&mdev_state->vconfig[offset], cfg_addr);
                break;
        case 0x18:  /* BAR2 */
        case 0x1c:  /* BAR3 */
        case 0x20:  /* BAR4 */
                STORE_LE32(&mdev_state->vconfig[offset], 0);
                break;
        default:
                pr_info("PCI config write @0x%x of %d bytes not handled\n",
                        offset, count);
                break;
        }
}

static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
                                u16 offset, u8 *buf, u32 count)
{
        u8 data = *buf;

        /* Handle data written by guest */
        switch (offset) {
        case UART_TX:
                /* if DLAB set, data is LSB of divisor */
                if (mdev_state->s[index].dlab) {
                        mdev_state->s[index].divisor |= data;
                        break;
                }

                mutex_lock(&mdev_state->rxtx_lock);

                /* save in TX buffer */
                if (mdev_state->s[index].rxtx.count <
                                mdev_state->s[index].max_fifo_size) {
                        mdev_state->s[index].rxtx.fifo[
                                        mdev_state->s[index].rxtx.head] = data;
                        mdev_state->s[index].rxtx.count++;
                        CIRCULAR_BUF_INC_IDX(mdev_state->s[index].rxtx.head);
                        mdev_state->s[index].overrun = false;

                        /*
                         * Trigger interrupt if receive data interrupt is
                         * enabled and fifo reached trigger level
                         */
                        if ((mdev_state->s[index].uart_reg[UART_IER] &
                                                UART_IER_RDI) &&
                           (mdev_state->s[index].rxtx.count ==
                                    mdev_state->s[index].intr_trigger_level)) {
                                /* trigger interrupt */
#if defined(DEBUG_INTR)
                                pr_err("Serial port %d: Fifo level trigger\n",
                                        index);
#endif
                                mtty_trigger_interrupt(mdev_state);
                        }
                } else {
#if defined(DEBUG_INTR)
                        pr_err("Serial port %d: Buffer Overflow\n", index);
#endif
                        mdev_state->s[index].overrun = true;

                        /*
                         * Trigger interrupt if receiver line status interrupt
                         * is enabled
                         */
                        if (mdev_state->s[index].uart_reg[UART_IER] &
                                                                UART_IER_RLSI)
                                mtty_trigger_interrupt(mdev_state);
                }
                mutex_unlock(&mdev_state->rxtx_lock);
                break;

        case UART_IER:
                /* if DLAB set, data is MSB of divisor */
                if (mdev_state->s[index].dlab)
                        mdev_state->s[index].divisor |= (u16)data << 8;
                else {
                        mdev_state->s[index].uart_reg[offset] = data;
                        mutex_lock(&mdev_state->rxtx_lock);
                        if ((data & UART_IER_THRI) &&
                            (mdev_state->s[index].rxtx.head ==
                                        mdev_state->s[index].rxtx.tail)) {
#if defined(DEBUG_INTR)
                                pr_err("Serial port %d: IER_THRI write\n",
                                        index);
#endif
                                mtty_trigger_interrupt(mdev_state);
                        }

                        mutex_unlock(&mdev_state->rxtx_lock);
                }

                break;

        case UART_FCR:
                mdev_state->s[index].fcr = data;

                mutex_lock(&mdev_state->rxtx_lock);
                if (data & (UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT)) {
                        /* clear loop back FIFO */
                        mdev_state->s[index].rxtx.count = 0;
                        mdev_state->s[index].rxtx.head = 0;
                        mdev_state->s[index].rxtx.tail = 0;
                }
                mutex_unlock(&mdev_state->rxtx_lock);

                switch (data & UART_FCR_TRIGGER_MASK) {
                case UART_FCR_TRIGGER_1:
                        mdev_state->s[index].intr_trigger_level = 1;
                        break;

                case UART_FCR_TRIGGER_4:
                        mdev_state->s[index].intr_trigger_level = 4;
                        break;

                case UART_FCR_TRIGGER_8:
                        mdev_state->s[index].intr_trigger_level = 8;
                        break;

                case UART_FCR_TRIGGER_14:
                        mdev_state->s[index].intr_trigger_level = 14;
                        break;
                }

                /*
                 * Set trigger level to 1 otherwise or  implement timer with
                 * timeout of 4 characters and on expiring that timer set
                 * Recevice data timeout in IIR register
                 */
                mdev_state->s[index].intr_trigger_level = 1;
                if (data & UART_FCR_ENABLE_FIFO)
                        mdev_state->s[index].max_fifo_size = MAX_FIFO_SIZE;
                else {
                        mdev_state->s[index].max_fifo_size = 1;
                        mdev_state->s[index].intr_trigger_level = 1;
                }

                break;

        case UART_LCR:
                if (data & UART_LCR_DLAB) {
                        mdev_state->s[index].dlab = true;
                        mdev_state->s[index].divisor = 0;
                } else
                        mdev_state->s[index].dlab = false;

                mdev_state->s[index].uart_reg[offset] = data;
                break;

        case UART_MCR:
                mdev_state->s[index].uart_reg[offset] = data;

                if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) &&
                                (data & UART_MCR_OUT2)) {
#if defined(DEBUG_INTR)
                        pr_err("Serial port %d: MCR_OUT2 write\n", index);
#endif
                        mtty_trigger_interrupt(mdev_state);
                }

                if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) &&
                                (data & (UART_MCR_RTS | UART_MCR_DTR))) {
#if defined(DEBUG_INTR)
                        pr_err("Serial port %d: MCR RTS/DTR write\n", index);
#endif
                        mtty_trigger_interrupt(mdev_state);
                }
                break;

        case UART_LSR:
        case UART_MSR:
                /* do nothing */
                break;

        case UART_SCR:
                mdev_state->s[index].uart_reg[offset] = data;
                break;

        default:
                break;
        }
}

static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state,
                            u16 offset, u8 *buf, u32 count)
{
        /* Handle read requests by guest */
        switch (offset) {
        case UART_RX:
                /* if DLAB set, data is LSB of divisor */
                if (mdev_state->s[index].dlab) {
                        *buf  = (u8)mdev_state->s[index].divisor;
                        break;
                }

                mutex_lock(&mdev_state->rxtx_lock);
                /* return data in tx buffer */
                if (mdev_state->s[index].rxtx.head !=
                                 mdev_state->s[index].rxtx.tail) {
                        *buf = mdev_state->s[index].rxtx.fifo[
                                                mdev_state->s[index].rxtx.tail];
                        mdev_state->s[index].rxtx.count--;
                        CIRCULAR_BUF_INC_IDX(mdev_state->s[index].rxtx.tail);
                }

                if (mdev_state->s[index].rxtx.head ==
                                mdev_state->s[index].rxtx.tail) {
                /*
                 *  Trigger interrupt if tx buffer empty interrupt is
                 *  enabled and fifo is empty
                 */
#if defined(DEBUG_INTR)
                        pr_err("Serial port %d: Buffer Empty\n", index);
#endif
                        if (mdev_state->s[index].uart_reg[UART_IER] &
                                                         UART_IER_THRI)
                                mtty_trigger_interrupt(mdev_state);
                }
                mutex_unlock(&mdev_state->rxtx_lock);

                break;

        case UART_IER:
                if (mdev_state->s[index].dlab) {
                        *buf = (u8)(mdev_state->s[index].divisor >> 8);
                        break;
                }
                *buf = mdev_state->s[index].uart_reg[offset] & 0x0f;
                break;

        case UART_IIR:
        {
                u8 ier = mdev_state->s[index].uart_reg[UART_IER];
                *buf = 0;

                mutex_lock(&mdev_state->rxtx_lock);
                /* Interrupt priority 1: Parity, overrun, framing or break */
                if ((ier & UART_IER_RLSI) && mdev_state->s[index].overrun)
                        *buf |= UART_IIR_RLSI;

                /* Interrupt priority 2: Fifo trigger level reached */
                if ((ier & UART_IER_RDI) &&
                    (mdev_state->s[index].rxtx.count >=
                      mdev_state->s[index].intr_trigger_level))
                        *buf |= UART_IIR_RDI;

                /* Interrupt priotiry 3: transmitter holding register empty */
                if ((ier & UART_IER_THRI) &&
                    (mdev_state->s[index].rxtx.head ==
                                mdev_state->s[index].rxtx.tail))
                        *buf |= UART_IIR_THRI;

                /* Interrupt priotiry 4: Modem status: CTS, DSR, RI or DCD  */
                if ((ier & UART_IER_MSI) &&
                    (mdev_state->s[index].uart_reg[UART_MCR] &
                                 (UART_MCR_RTS | UART_MCR_DTR)))
                        *buf |= UART_IIR_MSI;

                /* bit0: 0=> interrupt pending, 1=> no interrupt is pending */
                if (*buf == 0)
                        *buf = UART_IIR_NO_INT;

                /* set bit 6 & 7 to be 16550 compatible */
                *buf |= 0xC0;
                mutex_unlock(&mdev_state->rxtx_lock);
        }
        break;

        case UART_LCR:
        case UART_MCR:
                *buf = mdev_state->s[index].uart_reg[offset];
                break;

        case UART_LSR:
        {
                u8 lsr = 0;

                mutex_lock(&mdev_state->rxtx_lock);
                /* at least one char in FIFO */
                if (mdev_state->s[index].rxtx.head !=
                                 mdev_state->s[index].rxtx.tail)
                        lsr |= UART_LSR_DR;

                /* if FIFO overrun */
                if (mdev_state->s[index].overrun)
                        lsr |= UART_LSR_OE;

                /* transmit FIFO empty and tramsitter empty */
                if (mdev_state->s[index].rxtx.head ==
                                 mdev_state->s[index].rxtx.tail)
                        lsr |= UART_LSR_TEMT | UART_LSR_THRE;

                mutex_unlock(&mdev_state->rxtx_lock);
                *buf = lsr;
                break;
        }
        case UART_MSR:
                *buf = UART_MSR_DSR | UART_MSR_DDSR | UART_MSR_DCD;

                mutex_lock(&mdev_state->rxtx_lock);
                /* if AFE is 1 and FIFO have space, set CTS bit */
                if (mdev_state->s[index].uart_reg[UART_MCR] &
                                                 UART_MCR_AFE) {
                        if (mdev_state->s[index].rxtx.count <
                                        mdev_state->s[index].max_fifo_size)
                                *buf |= UART_MSR_CTS | UART_MSR_DCTS;
                } else
                        *buf |= UART_MSR_CTS | UART_MSR_DCTS;
                mutex_unlock(&mdev_state->rxtx_lock);

                break;

        case UART_SCR:
                *buf = mdev_state->s[index].uart_reg[offset];
                break;

        default:
                break;
        }
}

static void mdev_read_base(struct mdev_state *mdev_state)
{
        int index, pos;
        u32 start_lo, start_hi;
        u32 mem_type;

        pos = PCI_BASE_ADDRESS_0;

        for (index = 0; index <= VFIO_PCI_BAR5_REGION_INDEX; index++) {

                if (!mdev_state->region_info[index].size)
                        continue;

                start_lo = (*(u32 *)(mdev_state->vconfig + pos)) &
                        PCI_BASE_ADDRESS_MEM_MASK;
                mem_type = (*(u32 *)(mdev_state->vconfig + pos)) &
                        PCI_BASE_ADDRESS_MEM_TYPE_MASK;

                switch (mem_type) {
                case PCI_BASE_ADDRESS_MEM_TYPE_64:
                        start_hi = (*(u32 *)(mdev_state->vconfig + pos + 4));
                        pos += 4;
                        break;
                case PCI_BASE_ADDRESS_MEM_TYPE_32:
                case PCI_BASE_ADDRESS_MEM_TYPE_1M:
                        /* 1M mem BAR treated as 32-bit BAR */
                default:
                        /* mem unknown type treated as 32-bit BAR */
                        start_hi = 0;
                        break;
                }
                pos += 4;
                mdev_state->region_info[index].start = ((u64)start_hi << 32) |
                                                        start_lo;
        }
}

static ssize_t mdev_access(struct mdev_state *mdev_state, u8 *buf, size_t count,
                           loff_t pos, bool is_write)
{
        unsigned int index;
        loff_t offset;
        int ret = 0;

        if (!buf)
                return -EINVAL;

        mutex_lock(&mdev_state->ops_lock);

        index = MTTY_VFIO_PCI_OFFSET_TO_INDEX(pos);
        offset = pos & MTTY_VFIO_PCI_OFFSET_MASK;
        switch (index) {
        case VFIO_PCI_CONFIG_REGION_INDEX:

#if defined(DEBUG)
                pr_info("%s: PCI config space %s at offset 0x%llx\n",
                         __func__, is_write ? "write" : "read", offset);
#endif
                if (is_write) {
                        dump_buffer(buf, count);
                        handle_pci_cfg_write(mdev_state, offset, buf, count);
                } else {
                        memcpy(buf, (mdev_state->vconfig + offset), count);
                        dump_buffer(buf, count);
                }

                break;

        case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
                if (!mdev_state->region_info[index].start)
                        mdev_read_base(mdev_state);

                if (is_write) {
                        dump_buffer(buf, count);

#if defined(DEBUG_REGS)
                        pr_info("%s: BAR%d  WR @0x%llx %s val:0x%02x dlab:%d\n",
                                __func__, index, offset, wr_reg[offset],
                                *buf, mdev_state->s[index].dlab);
#endif
                        handle_bar_write(index, mdev_state, offset, buf, count);
                } else {
                        handle_bar_read(index, mdev_state, offset, buf, count);
                        dump_buffer(buf, count);

#if defined(DEBUG_REGS)
                        pr_info("%s: BAR%d  RD @0x%llx %s val:0x%02x dlab:%d\n",
                                __func__, index, offset, rd_reg[offset],
                                *buf, mdev_state->s[index].dlab);
#endif
                }
                break;

        default:
                ret = -1;
                goto accessfailed;
        }

        ret = count;


accessfailed:
        mutex_unlock(&mdev_state->ops_lock);

        return ret;
}

static size_t mtty_data_size(struct mdev_state *mdev_state)
{
        return offsetof(struct mtty_data, ports) +
                (mdev_state->nr_ports * sizeof(struct serial_port));
}

static void mtty_disable_file(struct mtty_migration_file *migf)
{
        mutex_lock(&migf->lock);
        migf->disabled = true;
        migf->filled_size = 0;
        migf->filp->f_pos = 0;
        mutex_unlock(&migf->lock);
}

static void mtty_disable_files(struct mdev_state *mdev_state)
{
        if (mdev_state->saving_migf) {
                mtty_disable_file(mdev_state->saving_migf);
                fput(mdev_state->saving_migf->filp);
                mdev_state->saving_migf = NULL;
        }

        if (mdev_state->resuming_migf) {
                mtty_disable_file(mdev_state->resuming_migf);
                fput(mdev_state->resuming_migf->filp);
                mdev_state->resuming_migf = NULL;
        }
}

static void mtty_state_mutex_unlock(struct mdev_state *mdev_state)
{
again:
        mutex_lock(&mdev_state->reset_mutex);
        if (mdev_state->deferred_reset) {
                mdev_state->deferred_reset = false;
                mutex_unlock(&mdev_state->reset_mutex);
                mdev_state->state = VFIO_DEVICE_STATE_RUNNING;
                mtty_disable_files(mdev_state);
                goto again;
        }
        mutex_unlock(&mdev_state->state_mutex);
        mutex_unlock(&mdev_state->reset_mutex);
}

static int mtty_release_migf(struct inode *inode, struct file *filp)
{
        struct mtty_migration_file *migf = filp->private_data;

        mtty_disable_file(migf);
        mutex_destroy(&migf->lock);
        kfree(migf);

        return 0;
}

static long mtty_precopy_ioctl(struct file *filp, unsigned int cmd,
                               unsigned long arg)
{
        struct mtty_migration_file *migf = filp->private_data;
        struct mdev_state *mdev_state = migf->mdev_state;
        loff_t *pos = &filp->f_pos;
        struct vfio_precopy_info info = {};
        unsigned long minsz;
        int ret;

        if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
                return -ENOTTY;

        minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);

        if (copy_from_user(&info, (void __user *)arg, minsz))
                return -EFAULT;
        if (info.argsz < minsz)
                return -EINVAL;

        mutex_lock(&mdev_state->state_mutex);
        if (mdev_state->state != VFIO_DEVICE_STATE_PRE_COPY &&
            mdev_state->state != VFIO_DEVICE_STATE_PRE_COPY_P2P) {
                ret = -EINVAL;
                goto unlock;
        }

        mutex_lock(&migf->lock);

        if (migf->disabled) {
                mutex_unlock(&migf->lock);
                ret = -ENODEV;
                goto unlock;
        }

        if (*pos > migf->filled_size) {
                mutex_unlock(&migf->lock);
                ret = -EINVAL;
                goto unlock;
        }

        info.dirty_bytes = 0;
        info.initial_bytes = migf->filled_size - *pos;
        mutex_unlock(&migf->lock);

        ret = copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
unlock:
        mtty_state_mutex_unlock(mdev_state);
        return ret;
}

static ssize_t mtty_save_read(struct file *filp, char __user *buf,
                              size_t len, loff_t *pos)
{
        struct mtty_migration_file *migf = filp->private_data;
        ssize_t ret = 0;

        if (pos)
                return -ESPIPE;

        pos = &filp->f_pos;

        mutex_lock(&migf->lock);

        dev_dbg(migf->mdev_state->vdev.dev, "%s ask %zu\n", __func__, len);

        if (migf->disabled) {
                ret = -ENODEV;
                goto out_unlock;
        }

        if (*pos > migf->filled_size) {
                ret = -EINVAL;
                goto out_unlock;
        }

        len = min_t(size_t, migf->filled_size - *pos, len);
        if (len) {
                if (copy_to_user(buf, (void *)&migf->data + *pos, len)) {
                        ret = -EFAULT;
                        goto out_unlock;
                }
                *pos += len;
                ret = len;
        }
out_unlock:
        dev_dbg(migf->mdev_state->vdev.dev, "%s read %zu\n", __func__, ret);
        mutex_unlock(&migf->lock);
        return ret;
}

static const struct file_operations mtty_save_fops = {
        .owner = THIS_MODULE,
        .read = mtty_save_read,
        .unlocked_ioctl = mtty_precopy_ioctl,
        .compat_ioctl = compat_ptr_ioctl,
        .release = mtty_release_migf,
};

static void mtty_save_state(struct mdev_state *mdev_state)
{
        struct mtty_migration_file *migf = mdev_state->saving_migf;
        int i;

        mutex_lock(&migf->lock);
        for (i = 0; i < mdev_state->nr_ports; i++) {
                memcpy(&migf->data.ports[i],
                        &mdev_state->s[i], sizeof(struct serial_port));
                migf->filled_size += sizeof(struct serial_port);
        }
        dev_dbg(mdev_state->vdev.dev,
                "%s filled to %zu\n", __func__, migf->filled_size);
        mutex_unlock(&migf->lock);
}

static int mtty_load_state(struct mdev_state *mdev_state)
{
        struct mtty_migration_file *migf = mdev_state->resuming_migf;
        int i;

        mutex_lock(&migf->lock);
        /* magic and version already tested by resume write fn */
        if (migf->filled_size < mtty_data_size(mdev_state)) {
                dev_dbg(mdev_state->vdev.dev, "%s expected %zu, got %zu\n",
                        __func__, mtty_data_size(mdev_state),
                        migf->filled_size);
                mutex_unlock(&migf->lock);
                return -EINVAL;
        }

        for (i = 0; i < mdev_state->nr_ports; i++)
                memcpy(&mdev_state->s[i],
                       &migf->data.ports[i], sizeof(struct serial_port));

        mutex_unlock(&migf->lock);
        return 0;
}

static struct mtty_migration_file *
mtty_save_device_data(struct mdev_state *mdev_state,
                      enum vfio_device_mig_state state)
{
        struct mtty_migration_file *migf = mdev_state->saving_migf;
        struct mtty_migration_file *ret = NULL;

        if (migf) {
                if (state == VFIO_DEVICE_STATE_STOP_COPY)
                        goto fill_data;
                return ret;
        }

        migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
        if (!migf)
                return ERR_PTR(-ENOMEM);

        migf->filp = anon_inode_getfile("mtty_mig", &mtty_save_fops,
                                        migf, O_RDONLY);
        if (IS_ERR(migf->filp)) {
                int rc = PTR_ERR(migf->filp);

                kfree(migf);
                return ERR_PTR(rc);
        }

        stream_open(migf->filp->f_inode, migf->filp);
        mutex_init(&migf->lock);
        migf->mdev_state = mdev_state;

        migf->data.magic = MTTY_MAGIC;
        migf->data.major_ver = MTTY_MAJOR_VER;
        migf->data.minor_ver = MTTY_MINOR_VER;
        migf->data.nr_ports = mdev_state->nr_ports;

        migf->filled_size = offsetof(struct mtty_data, ports);

        dev_dbg(mdev_state->vdev.dev, "%s filled header to %zu\n",
                __func__, migf->filled_size);

        ret = mdev_state->saving_migf = migf;

fill_data:
        if (state == VFIO_DEVICE_STATE_STOP_COPY)
                mtty_save_state(mdev_state);

        return ret;
}

static ssize_t mtty_resume_write(struct file *filp, const char __user *buf,
                                 size_t len, loff_t *pos)
{
        struct mtty_migration_file *migf = filp->private_data;
        struct mdev_state *mdev_state = migf->mdev_state;
        loff_t requested_length;
        ssize_t ret = 0;

        if (pos)
                return -ESPIPE;

        pos = &filp->f_pos;

        if (*pos < 0 ||
            check_add_overflow((loff_t)len, *pos, &requested_length))
                return -EINVAL;

        if (requested_length > mtty_data_size(mdev_state))
                return -ENOMEM;

        mutex_lock(&migf->lock);

        if (migf->disabled) {
                ret = -ENODEV;
                goto out_unlock;
        }

        if (copy_from_user((void *)&migf->data + *pos, buf, len)) {
                ret = -EFAULT;
                goto out_unlock;
        }

        *pos += len;
        ret = len;

        dev_dbg(migf->mdev_state->vdev.dev, "%s received %zu, total %zu\n",
                __func__, len, migf->filled_size + len);

        if (migf->filled_size < offsetof(struct mtty_data, ports) &&
            migf->filled_size + len >= offsetof(struct mtty_data, ports)) {
                if (migf->data.magic != MTTY_MAGIC || migf->data.flags ||
                    migf->data.major_ver != MTTY_MAJOR_VER ||
                    migf->data.minor_ver != MTTY_MINOR_VER ||
                    migf->data.nr_ports != mdev_state->nr_ports) {
                        dev_dbg(migf->mdev_state->vdev.dev,
                                "%s failed validation\n", __func__);
                        ret = -EFAULT;
                } else {
                        dev_dbg(migf->mdev_state->vdev.dev,
                                "%s header validated\n", __func__);
                }
        }

        migf->filled_size += len;

out_unlock:
        mutex_unlock(&migf->lock);
        return ret;
}

static const struct file_operations mtty_resume_fops = {
        .owner = THIS_MODULE,
        .write = mtty_resume_write,
        .release = mtty_release_migf,
};

static struct mtty_migration_file *
mtty_resume_device_data(struct mdev_state *mdev_state)
{
        struct mtty_migration_file *migf;
        int ret;

        migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
        if (!migf)
                return ERR_PTR(-ENOMEM);

        migf->filp = anon_inode_getfile("mtty_mig", &mtty_resume_fops,
                                        migf, O_WRONLY);
        if (IS_ERR(migf->filp)) {
                ret = PTR_ERR(migf->filp);
                kfree(migf);
                return ERR_PTR(ret);
        }

        stream_open(migf->filp->f_inode, migf->filp);
        mutex_init(&migf->lock);
        migf->mdev_state = mdev_state;

        mdev_state->resuming_migf = migf;

        return migf;
}

static struct file *mtty_step_state(struct mdev_state *mdev_state,
                                     enum vfio_device_mig_state new)
{
        enum vfio_device_mig_state cur = mdev_state->state;

        dev_dbg(mdev_state->vdev.dev, "%s: %d -> %d\n", __func__, cur, new);

        /*
         * The following state transitions are no-op considering
         * mtty does not do DMA nor require any explicit start/stop.
         *
         *         RUNNING -> RUNNING_P2P
         *         RUNNING_P2P -> RUNNING
         *         RUNNING_P2P -> STOP
         *         PRE_COPY -> PRE_COPY_P2P
         *         PRE_COPY_P2P -> PRE_COPY
         *         STOP -> RUNNING_P2P
         */
        if ((cur == VFIO_DEVICE_STATE_RUNNING &&
             new == VFIO_DEVICE_STATE_RUNNING_P2P) ||
            (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
             (new == VFIO_DEVICE_STATE_RUNNING ||
              new == VFIO_DEVICE_STATE_STOP)) ||
            (cur == VFIO_DEVICE_STATE_PRE_COPY &&
             new == VFIO_DEVICE_STATE_PRE_COPY_P2P) ||
            (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P &&
             new == VFIO_DEVICE_STATE_PRE_COPY) ||
            (cur == VFIO_DEVICE_STATE_STOP &&
             new == VFIO_DEVICE_STATE_RUNNING_P2P))
                return NULL;

        /*
         * The following state transitions simply close migration files,
         * with the exception of RESUMING -> STOP, which needs to load
         * the state first.
         *
         *         RESUMING -> STOP
         *         PRE_COPY -> RUNNING
         *         PRE_COPY_P2P -> RUNNING_P2P
         *         STOP_COPY -> STOP
         */
        if (cur == VFIO_DEVICE_STATE_RESUMING &&
            new == VFIO_DEVICE_STATE_STOP) {
                int ret;

                ret = mtty_load_state(mdev_state);
                if (ret)
                        return ERR_PTR(ret);
                mtty_disable_files(mdev_state);
                return NULL;
        }

        if ((cur == VFIO_DEVICE_STATE_PRE_COPY &&
             new == VFIO_DEVICE_STATE_RUNNING) ||
            (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P &&
             new == VFIO_DEVICE_STATE_RUNNING_P2P) ||
            (cur == VFIO_DEVICE_STATE_STOP_COPY &&
             new == VFIO_DEVICE_STATE_STOP)) {
                mtty_disable_files(mdev_state);
                return NULL;
        }

        /*
         * The following state transitions return migration files.
         *
         *         RUNNING -> PRE_COPY
         *         RUNNING_P2P -> PRE_COPY_P2P
         *         STOP -> STOP_COPY
         *         STOP -> RESUMING
         *         PRE_COPY_P2P -> STOP_COPY
         */
        if ((cur == VFIO_DEVICE_STATE_RUNNING &&
             new == VFIO_DEVICE_STATE_PRE_COPY) ||
            (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
             new == VFIO_DEVICE_STATE_PRE_COPY_P2P) ||
            (cur == VFIO_DEVICE_STATE_STOP &&
             new == VFIO_DEVICE_STATE_STOP_COPY) ||
            (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P &&
             new == VFIO_DEVICE_STATE_STOP_COPY)) {
                struct mtty_migration_file *migf;

                migf = mtty_save_device_data(mdev_state, new);
                if (IS_ERR(migf))
                        return ERR_CAST(migf);

                if (migf) {
                        get_file(migf->filp);

                        return migf->filp;
                }
                return NULL;
        }

        if (cur == VFIO_DEVICE_STATE_STOP &&
            new == VFIO_DEVICE_STATE_RESUMING) {
                struct mtty_migration_file *migf;

                migf = mtty_resume_device_data(mdev_state);
                if (IS_ERR(migf))
                        return ERR_CAST(migf);

                get_file(migf->filp);

                return migf->filp;
        }

        /* vfio_mig_get_next_state() does not use arcs other than the above */
        WARN_ON(true);
        return ERR_PTR(-EINVAL);
}

static struct file *mtty_set_state(struct vfio_device *vdev,
                                   enum vfio_device_mig_state new_state)
{
        struct mdev_state *mdev_state =
                container_of(vdev, struct mdev_state, vdev);
        struct file *ret = NULL;

        dev_dbg(vdev->dev, "%s -> %d\n", __func__, new_state);

        mutex_lock(&mdev_state->state_mutex);
        while (mdev_state->state != new_state) {
                enum vfio_device_mig_state next_state;
                int rc = vfio_mig_get_next_state(vdev, mdev_state->state,
                                                 new_state, &next_state);
                if (rc) {
                        ret = ERR_PTR(rc);
                        break;
                }

                ret = mtty_step_state(mdev_state, next_state);
                if (IS_ERR(ret))
                        break;

                mdev_state->state = next_state;

                if (WARN_ON(ret && new_state != next_state)) {
                        fput(ret);
                        ret = ERR_PTR(-EINVAL);
                        break;
                }
        }
        mtty_state_mutex_unlock(mdev_state);
        return ret;
}

static int mtty_get_state(struct vfio_device *vdev,
                          enum vfio_device_mig_state *current_state)
{
        struct mdev_state *mdev_state =
                container_of(vdev, struct mdev_state, vdev);

        mutex_lock(&mdev_state->state_mutex);
        *current_state = mdev_state->state;
        mtty_state_mutex_unlock(mdev_state);
        return 0;
}

static int mtty_get_data_size(struct vfio_device *vdev,
                              unsigned long *stop_copy_length)
{
        struct mdev_state *mdev_state =
                container_of(vdev, struct mdev_state, vdev);

        *stop_copy_length = mtty_data_size(mdev_state);
        return 0;
}

static const struct vfio_migration_ops mtty_migration_ops = {
        .migration_set_state = mtty_set_state,
        .migration_get_state = mtty_get_state,
        .migration_get_data_size = mtty_get_data_size,
};

static int mtty_log_start(struct vfio_device *vdev,
                          struct rb_root_cached *ranges,
                          u32 nnodes, u64 *page_size)
{
        return 0;
}

static int mtty_log_stop(struct vfio_device *vdev)
{
        return 0;
}

static int mtty_log_read_and_clear(struct vfio_device *vdev,
                                   unsigned long iova, unsigned long length,
                                   struct iova_bitmap *dirty)
{
        return 0;
}

static const struct vfio_log_ops mtty_log_ops = {
        .log_start = mtty_log_start,
        .log_stop = mtty_log_stop,
        .log_read_and_clear = mtty_log_read_and_clear,
};

static int mtty_init_dev(struct vfio_device *vdev)
{
        struct mdev_state *mdev_state =
                container_of(vdev, struct mdev_state, vdev);
        struct mdev_device *mdev = to_mdev_device(vdev->dev);
        struct mtty_type *type =
                container_of(mdev->type, struct mtty_type, type);
        int avail_ports = atomic_read(&mdev_avail_ports);
        int ret;

        do {
                if (avail_ports < type->nr_ports)
                        return -ENOSPC;
        } while (!atomic_try_cmpxchg(&mdev_avail_ports,
                                     &avail_ports,
                                     avail_ports - type->nr_ports));

        mdev_state->nr_ports = type->nr_ports;
        mdev_state->irq_index = -1;
        mdev_state->s[0].max_fifo_size = MAX_FIFO_SIZE;
        mdev_state->s[1].max_fifo_size = MAX_FIFO_SIZE;
        mutex_init(&mdev_state->rxtx_lock);

        mdev_state->vconfig = kzalloc(MTTY_CONFIG_SPACE_SIZE, GFP_KERNEL);
        if (!mdev_state->vconfig) {
                ret = -ENOMEM;
                goto err_nr_ports;
        }

        mutex_init(&mdev_state->ops_lock);
        mdev_state->mdev = mdev;
        mtty_create_config_space(mdev_state);

        mutex_init(&mdev_state->state_mutex);
        mutex_init(&mdev_state->reset_mutex);
        vdev->migration_flags = VFIO_MIGRATION_STOP_COPY |
                                VFIO_MIGRATION_P2P |
                                VFIO_MIGRATION_PRE_COPY;
        vdev->mig_ops = &mtty_migration_ops;
        vdev->log_ops = &mtty_log_ops;
        mdev_state->state = VFIO_DEVICE_STATE_RUNNING;

        return 0;

err_nr_ports:
        atomic_add(type->nr_ports, &mdev_avail_ports);
        return ret;
}

static int mtty_probe(struct mdev_device *mdev)
{
        struct mdev_state *mdev_state;
        int ret;

        mdev_state = vfio_alloc_device(mdev_state, vdev, &mdev->dev,
                                       &mtty_dev_ops);
        if (IS_ERR(mdev_state))
                return PTR_ERR(mdev_state);

        ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev);
        if (ret)
                goto err_put_vdev;
        dev_set_drvdata(&mdev->dev, mdev_state);
        return 0;

err_put_vdev:
        vfio_put_device(&mdev_state->vdev);
        return ret;
}

static void mtty_release_dev(struct vfio_device *vdev)
{
        struct mdev_state *mdev_state =
                container_of(vdev, struct mdev_state, vdev);

        mutex_destroy(&mdev_state->reset_mutex);
        mutex_destroy(&mdev_state->state_mutex);
        atomic_add(mdev_state->nr_ports, &mdev_avail_ports);
        kfree(mdev_state->vconfig);
}

static void mtty_remove(struct mdev_device *mdev)
{
        struct mdev_state *mdev_state = dev_get_drvdata(&mdev->dev);

        vfio_unregister_group_dev(&mdev_state->vdev);
        vfio_put_device(&mdev_state->vdev);
}

static int mtty_reset(struct mdev_state *mdev_state)
{
        pr_info("%s: called\n", __func__);

        mutex_lock(&mdev_state->reset_mutex);
        mdev_state->deferred_reset = true;
        if (!mutex_trylock(&mdev_state->state_mutex)) {
                mutex_unlock(&mdev_state->reset_mutex);
                return 0;
        }
        mutex_unlock(&mdev_state->reset_mutex);
        mtty_state_mutex_unlock(mdev_state);

        return 0;
}

static ssize_t mtty_read(struct vfio_device *vdev, char __user *buf,
                         size_t count, loff_t *ppos)
{
        struct mdev_state *mdev_state =
                container_of(vdev, struct mdev_state, vdev);
        unsigned int done = 0;
        int ret;

        while (count) {
                size_t filled;

                if (count >= 4 && !(*ppos % 4)) {
                        u32 val;

                        ret =  mdev_access(mdev_state, (u8 *)&val, sizeof(val),
                                           *ppos, false);
                        if (ret <= 0)
                                goto read_err;

                        if (copy_to_user(buf, &val, sizeof(val)))
                                goto read_err;

                        filled = 4;
                } else if (count >= 2 && !(*ppos % 2)) {
                        u16 val;

                        ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
                                          *ppos, false);
                        if (ret <= 0)
                                goto read_err;

                        if (copy_to_user(buf, &val, sizeof(val)))
                                goto read_err;

                        filled = 2;
                } else {
                        u8 val;

                        ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
                                          *ppos, false);
                        if (ret <= 0)
                                goto read_err;

                        if (copy_to_user(buf, &val, sizeof(val)))
                                goto read_err;

                        filled = 1;
                }

                count -= filled;
                done += filled;
                *ppos += filled;
                buf += filled;
        }

        return done;

read_err:
        return -EFAULT;
}

static ssize_t mtty_write(struct vfio_device *vdev, const char __user *buf,
                   size_t count, loff_t *ppos)
{
        struct mdev_state *mdev_state =
                container_of(vdev, struct mdev_state, vdev);
        unsigned int done = 0;
        int ret;

        while (count) {
                size_t filled;

                if (count >= 4 && !(*ppos % 4)) {
                        u32 val;

                        if (copy_from_user(&val, buf, sizeof(val)))
                                goto write_err;

                        ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
                                          *ppos, true);
                        if (ret <= 0)
                                goto write_err;

                        filled = 4;
                } else if (count >= 2 && !(*ppos % 2)) {
                        u16 val;

                        if (copy_from_user(&val, buf, sizeof(val)))
                                goto write_err;

                        ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
                                          *ppos, true);
                        if (ret <= 0)
                                goto write_err;

                        filled = 2;
                } else {
                        u8 val;

                        if (copy_from_user(&val, buf, sizeof(val)))
                                goto write_err;

                        ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
                                          *ppos, true);
                        if (ret <= 0)
                                goto write_err;

                        filled = 1;
                }
                count -= filled;
                done += filled;
                *ppos += filled;
                buf += filled;
        }

        return done;
write_err:
        return -EFAULT;
}

static void mtty_disable_intx(struct mdev_state *mdev_state)
{
        if (mdev_state->intx_evtfd) {
                eventfd_ctx_put(mdev_state->intx_evtfd);
                mdev_state->intx_evtfd = NULL;
                mdev_state->intx_mask = false;
                mdev_state->irq_index = -1;
        }
}

static void mtty_disable_msi(struct mdev_state *mdev_state)
{
        if (mdev_state->msi_evtfd) {
                eventfd_ctx_put(mdev_state->msi_evtfd);
                mdev_state->msi_evtfd = NULL;
                mdev_state->irq_index = -1;
        }
}

static int mtty_set_irqs(struct mdev_state *mdev_state, uint32_t flags,
                         unsigned int index, unsigned int start,
                         unsigned int count, void *data)
{
        int ret = 0;

        mutex_lock(&mdev_state->ops_lock);
        switch (index) {
        case VFIO_PCI_INTX_IRQ_INDEX:
                switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
                case VFIO_IRQ_SET_ACTION_MASK:
                        if (!is_intx(mdev_state) || start != 0 || count != 1) {
                                ret = -EINVAL;
                                break;
                        }

                        if (flags & VFIO_IRQ_SET_DATA_NONE) {
                                mdev_state->intx_mask = true;
                        } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
                                uint8_t mask = *(uint8_t *)data;

                                if (mask)
                                        mdev_state->intx_mask = true;
                        } else if (flags &  VFIO_IRQ_SET_DATA_EVENTFD) {
                                ret = -ENOTTY; /* No support for mask fd */
                        }
                        break;
                case VFIO_IRQ_SET_ACTION_UNMASK:
                        if (!is_intx(mdev_state) || start != 0 || count != 1) {
                                ret = -EINVAL;
                                break;
                        }

                        if (flags & VFIO_IRQ_SET_DATA_NONE) {
                                mdev_state->intx_mask = false;
                        } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
                                uint8_t mask = *(uint8_t *)data;

                                if (mask)
                                        mdev_state->intx_mask = false;
                        } else if (flags &  VFIO_IRQ_SET_DATA_EVENTFD) {
                                ret = -ENOTTY; /* No support for unmask fd */
                        }
                        break;
                case VFIO_IRQ_SET_ACTION_TRIGGER:
                        if (is_intx(mdev_state) && !count &&
                            (flags & VFIO_IRQ_SET_DATA_NONE)) {
                                mtty_disable_intx(mdev_state);
                                break;
                        }

                        if (!(is_intx(mdev_state) || is_noirq(mdev_state)) ||
                            start != 0 || count != 1) {
                                ret = -EINVAL;
                                break;
                        }

                        if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
                                int fd = *(int *)data;
                                struct eventfd_ctx *evt;

                                mtty_disable_intx(mdev_state);

                                if (fd < 0)
                                        break;

                                evt = eventfd_ctx_fdget(fd);
                                if (IS_ERR(evt)) {
                                        ret = PTR_ERR(evt);
                                        break;
                                }
                                mdev_state->intx_evtfd = evt;
                                mdev_state->irq_index = index;
                                break;
                        }

                        if (!is_intx(mdev_state)) {
                                ret = -EINVAL;
                                break;
                        }

                        if (flags & VFIO_IRQ_SET_DATA_NONE) {
                                mtty_trigger_interrupt(mdev_state);
                        } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
                                uint8_t trigger = *(uint8_t *)data;

                                if (trigger)
                                        mtty_trigger_interrupt(mdev_state);
                        }
                        break;
                }
                break;
        case VFIO_PCI_MSI_IRQ_INDEX:
                switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
                case VFIO_IRQ_SET_ACTION_MASK:
                case VFIO_IRQ_SET_ACTION_UNMASK:
                        ret = -ENOTTY;
                        break;
                case VFIO_IRQ_SET_ACTION_TRIGGER:
                        if (is_msi(mdev_state) && !count &&
                            (flags & VFIO_IRQ_SET_DATA_NONE)) {
                                mtty_disable_msi(mdev_state);
                                break;
                        }

                        if (!(is_msi(mdev_state) || is_noirq(mdev_state)) ||
                            start != 0 || count != 1) {
                                ret = -EINVAL;
                                break;
                        }

                        if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
                                int fd = *(int *)data;
                                struct eventfd_ctx *evt;

                                mtty_disable_msi(mdev_state);

                                if (fd < 0)
                                        break;

                                evt = eventfd_ctx_fdget(fd);
                                if (IS_ERR(evt)) {
                                        ret = PTR_ERR(evt);
                                        break;
                                }
                                mdev_state->msi_evtfd = evt;
                                mdev_state->irq_index = index;
                                break;
                        }

                        if (!is_msi(mdev_state)) {
                                ret = -EINVAL;
                                break;
                        }

                        if (flags & VFIO_IRQ_SET_DATA_NONE) {
                                mtty_trigger_interrupt(mdev_state);
                        } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
                                uint8_t trigger = *(uint8_t *)data;

                                if (trigger)
                                        mtty_trigger_interrupt(mdev_state);
                        }
                        break;
                }
                break;
        case VFIO_PCI_MSIX_IRQ_INDEX:
                dev_dbg(mdev_state->vdev.dev, "%s: MSIX_IRQ\n", __func__);
                ret = -ENOTTY;
                break;
        case VFIO_PCI_ERR_IRQ_INDEX:
                dev_dbg(mdev_state->vdev.dev, "%s: ERR_IRQ\n", __func__);
                ret = -ENOTTY;
                break;
        case VFIO_PCI_REQ_IRQ_INDEX:
                dev_dbg(mdev_state->vdev.dev, "%s: REQ_IRQ\n", __func__);
                ret = -ENOTTY;
                break;
        }

        mutex_unlock(&mdev_state->ops_lock);
        return ret;
}

static int mtty_ioctl_get_region_info(struct vfio_device *vdev,
                                      struct vfio_region_info *region_info,
                                      struct vfio_info_cap *caps)
{
        struct mdev_state *mdev_state =
                container_of(vdev, struct mdev_state, vdev);
        unsigned int size = 0;
        u32 bar_index;

        bar_index = region_info->index;
        if (bar_index >= VFIO_PCI_NUM_REGIONS)
                return -EINVAL;

        mutex_lock(&mdev_state->ops_lock);

        switch (bar_index) {
        case VFIO_PCI_CONFIG_REGION_INDEX:
                size = MTTY_CONFIG_SPACE_SIZE;
                break;
        case VFIO_PCI_BAR0_REGION_INDEX:
                size = MTTY_IO_BAR_SIZE;
                break;
        case VFIO_PCI_BAR1_REGION_INDEX:
                if (mdev_state->nr_ports == 2)
                        size = MTTY_IO_BAR_SIZE;
                break;
        default:
                size = 0;
                break;
        }

        mdev_state->region_info[bar_index].size = size;
        mdev_state->region_info[bar_index].vfio_offset =
                MTTY_VFIO_PCI_INDEX_TO_OFFSET(bar_index);

        region_info->size = size;
        region_info->offset = MTTY_VFIO_PCI_INDEX_TO_OFFSET(bar_index);
        region_info->flags = VFIO_REGION_INFO_FLAG_READ |
                VFIO_REGION_INFO_FLAG_WRITE;
        mutex_unlock(&mdev_state->ops_lock);
        return 0;
}

static int mtty_get_irq_info(struct vfio_irq_info *irq_info)
{
        if (irq_info->index != VFIO_PCI_INTX_IRQ_INDEX &&
            irq_info->index != VFIO_PCI_MSI_IRQ_INDEX)
                return -EINVAL;

        irq_info->flags = VFIO_IRQ_INFO_EVENTFD;
        irq_info->count = 1;

        if (irq_info->index == VFIO_PCI_INTX_IRQ_INDEX)
                irq_info->flags |= VFIO_IRQ_INFO_MASKABLE |
                                   VFIO_IRQ_INFO_AUTOMASKED;
        else
                irq_info->flags |= VFIO_IRQ_INFO_NORESIZE;

        return 0;
}

static int mtty_get_device_info(struct vfio_device_info *dev_info)
{
        dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
        dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
        dev_info->num_irqs = VFIO_PCI_NUM_IRQS;

        return 0;
}

static long mtty_ioctl(struct vfio_device *vdev, unsigned int cmd,
                        unsigned long arg)
{
        struct mdev_state *mdev_state =
                container_of(vdev, struct mdev_state, vdev);
        int ret = 0;
        unsigned long minsz;

        switch (cmd) {
        case VFIO_DEVICE_GET_INFO:
        {
                struct vfio_device_info info;

                minsz = offsetofend(struct vfio_device_info, num_irqs);

                if (copy_from_user(&info, (void __user *)arg, minsz))
                        return -EFAULT;

                if (info.argsz < minsz)
                        return -EINVAL;

                ret = mtty_get_device_info(&info);
                if (ret)
                        return ret;

                memcpy(&mdev_state->dev_info, &info, sizeof(info));

                if (copy_to_user((void __user *)arg, &info, minsz))
                        return -EFAULT;

                return 0;
        }

        case VFIO_DEVICE_GET_IRQ_INFO:
        {
                struct vfio_irq_info info;

                minsz = offsetofend(struct vfio_irq_info, count);

                if (copy_from_user(&info, (void __user *)arg, minsz))
                        return -EFAULT;

                if ((info.argsz < minsz) ||
                    (info.index >= mdev_state->dev_info.num_irqs))
                        return -EINVAL;

                ret = mtty_get_irq_info(&info);
                if (ret)
                        return ret;

                if (copy_to_user((void __user *)arg, &info, minsz))
                        return -EFAULT;

                return 0;
        }
        case VFIO_DEVICE_SET_IRQS:
        {
                struct vfio_irq_set hdr;
                u8 *data = NULL, *ptr = NULL;
                size_t data_size = 0;

                minsz = offsetofend(struct vfio_irq_set, count);

                if (copy_from_user(&hdr, (void __user *)arg, minsz))
                        return -EFAULT;

                ret = vfio_set_irqs_validate_and_prepare(&hdr,
                                                mdev_state->dev_info.num_irqs,
                                                VFIO_PCI_NUM_IRQS,
                                                &data_size);
                if (ret)
                        return ret;

                if (data_size) {
                        ptr = data = memdup_user((void __user *)(arg + minsz),
                                                 data_size);
                        if (IS_ERR(data))
                                return PTR_ERR(data);
                }

                ret = mtty_set_irqs(mdev_state, hdr.flags, hdr.index, hdr.start,
                                    hdr.count, data);

                kfree(ptr);
                return ret;
        }
        case VFIO_DEVICE_RESET:
                return mtty_reset(mdev_state);
        }
        return -ENOTTY;
}

static ssize_t
sample_mdev_dev_show(struct device *dev, struct device_attribute *attr,
                     char *buf)
{
        return sprintf(buf, "This is MDEV %s\n", dev_name(dev));
}

static DEVICE_ATTR_RO(sample_mdev_dev);

static struct attribute *mdev_dev_attrs[] = {
        &dev_attr_sample_mdev_dev.attr,
        NULL,
};

static const struct attribute_group mdev_dev_group = {
        .name  = "vendor",
        .attrs = mdev_dev_attrs,
};

static const struct attribute_group *mdev_dev_groups[] = {
        &mdev_dev_group,
        NULL,
};

static unsigned int mtty_get_available(struct mdev_type *mtype)
{
        struct mtty_type *type = container_of(mtype, struct mtty_type, type);

        return atomic_read(&mdev_avail_ports) / type->nr_ports;
}

static void mtty_close(struct vfio_device *vdev)
{
        struct mdev_state *mdev_state =
                                container_of(vdev, struct mdev_state, vdev);

        mtty_disable_files(mdev_state);
        mtty_disable_intx(mdev_state);
        mtty_disable_msi(mdev_state);
}

static const struct vfio_device_ops mtty_dev_ops = {
        .name = "vfio-mtty",
        .init = mtty_init_dev,
        .release = mtty_release_dev,
        .read = mtty_read,
        .write = mtty_write,
        .ioctl = mtty_ioctl,
        .get_region_info_caps = mtty_ioctl_get_region_info,
        .bind_iommufd   = vfio_iommufd_emulated_bind,
        .unbind_iommufd = vfio_iommufd_emulated_unbind,
        .attach_ioas    = vfio_iommufd_emulated_attach_ioas,
        .detach_ioas    = vfio_iommufd_emulated_detach_ioas,
        .close_device   = mtty_close,
};

static struct mdev_driver mtty_driver = {
        .device_api = VFIO_DEVICE_API_PCI_STRING,
        .driver = {
                .name = "mtty",
                .owner = THIS_MODULE,
                .mod_name = KBUILD_MODNAME,
                .dev_groups = mdev_dev_groups,
        },
        .probe = mtty_probe,
        .remove = mtty_remove,
        .get_available = mtty_get_available,
};

static void mtty_device_release(struct device *dev)
{
        dev_dbg(dev, "mtty: released\n");
}

static int __init mtty_dev_init(void)
{
        int ret = 0;

        pr_info("mtty_dev: %s\n", __func__);

        memset(&mtty_dev, 0, sizeof(mtty_dev));

        idr_init(&mtty_dev.vd_idr);

        ret = alloc_chrdev_region(&mtty_dev.vd_devt, 0, MINORMASK + 1,
                                  MTTY_NAME);

        if (ret < 0) {
                pr_err("Error: failed to register mtty_dev, err:%d\n", ret);
                return ret;
        }

        cdev_init(&mtty_dev.vd_cdev, &vd_fops);
        cdev_add(&mtty_dev.vd_cdev, mtty_dev.vd_devt, MINORMASK + 1);

        pr_info("major_number:%d\n", MAJOR(mtty_dev.vd_devt));

        ret = mdev_register_driver(&mtty_driver);
        if (ret)
                goto err_cdev;

        mtty_dev.vd_class = class_create(MTTY_CLASS_NAME);

        if (IS_ERR(mtty_dev.vd_class)) {
                pr_err("Error: failed to register mtty_dev class\n");
                ret = PTR_ERR(mtty_dev.vd_class);
                goto err_driver;
        }

        mtty_dev.dev.class = mtty_dev.vd_class;
        mtty_dev.dev.release = mtty_device_release;
        dev_set_name(&mtty_dev.dev, "%s", MTTY_NAME);

        ret = device_register(&mtty_dev.dev);
        if (ret)
                goto err_put;

        ret = mdev_register_parent(&mtty_dev.parent, &mtty_dev.dev,
                                   &mtty_driver, mtty_mdev_types,
                                   ARRAY_SIZE(mtty_mdev_types));
        if (ret)
                goto err_device;
        return 0;

err_device:
        device_del(&mtty_dev.dev);
err_put:
        put_device(&mtty_dev.dev);
        class_destroy(mtty_dev.vd_class);
err_driver:
        mdev_unregister_driver(&mtty_driver);
err_cdev:
        cdev_del(&mtty_dev.vd_cdev);
        unregister_chrdev_region(mtty_dev.vd_devt, MINORMASK + 1);
        return ret;
}

static void __exit mtty_dev_exit(void)
{
        mtty_dev.dev.bus = NULL;
        mdev_unregister_parent(&mtty_dev.parent);

        device_unregister(&mtty_dev.dev);
        idr_destroy(&mtty_dev.vd_idr);
        mdev_unregister_driver(&mtty_driver);
        cdev_del(&mtty_dev.vd_cdev);
        unregister_chrdev_region(mtty_dev.vd_devt, MINORMASK + 1);
        class_destroy(mtty_dev.vd_class);
        mtty_dev.vd_class = NULL;
        pr_info("mtty_dev: Unloaded!\n");
}

module_init(mtty_dev_init)
module_exit(mtty_dev_exit)

MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Test driver that simulate serial port over PCI");
MODULE_VERSION(VERSION_STRING);
MODULE_AUTHOR(DRIVER_AUTHOR);