root/drivers/uio/uio_hv_generic.c
// SPDX-License-Identifier: GPL-2.0
/*
 * uio_hv_generic - generic UIO driver for VMBus
 *
 * Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
 * Copyright (c) 2016, Microsoft Corporation.
 *
 * Since the driver does not declare any device ids, you must allocate
 * id and bind the device to the driver yourself.  For example:
 *
 * Associate Network GUID with UIO device
 * # echo "f8615163-df3e-46c5-913f-f2d2f965ed0e" \
 *    > /sys/bus/vmbus/drivers/uio_hv_generic/new_id
 * Then rebind
 * # echo -n "ed963694-e847-4b2a-85af-bc9cfc11d6f3" \
 *    > /sys/bus/vmbus/drivers/hv_netvsc/unbind
 * # echo -n "ed963694-e847-4b2a-85af-bc9cfc11d6f3" \
 *    > /sys/bus/vmbus/drivers/uio_hv_generic/bind
 */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/uio_driver.h>
#include <linux/netdevice.h>
#include <linux/if_ether.h>
#include <linux/skbuff.h>
#include <linux/hyperv.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>

#include "../hv/hyperv_vmbus.h"

#define DRIVER_VERSION  "0.02.1"
#define DRIVER_AUTHOR   "Stephen Hemminger <sthemmin at microsoft.com>"
#define DRIVER_DESC     "Generic UIO driver for VMBus devices"

#define SEND_BUFFER_SIZE (16 * 1024 * 1024)
#define RECV_BUFFER_SIZE (31 * 1024 * 1024)

/*
 * List of resources to be mapped to user space
 * can be extended up to MAX_UIO_MAPS(5) items
 */
enum hv_uio_map {
        TXRX_RING_MAP = 0,
        INT_PAGE_MAP,
        MON_PAGE_MAP,
        RECV_BUF_MAP,
        SEND_BUF_MAP
};

struct hv_uio_private_data {
        struct uio_info info;
        struct hv_device *device;
        atomic_t refcnt;

        void    *recv_buf;
        struct vmbus_gpadl recv_gpadl;
        char    recv_name[32];  /* "recv_4294967295" */

        void    *send_buf;
        struct vmbus_gpadl send_gpadl;
        char    send_name[32];
};

static void set_event(struct vmbus_channel *channel, s32 irq_state)
{
        channel->inbound.ring_buffer->interrupt_mask = !irq_state;
        if (!channel->offermsg.monitor_allocated && irq_state) {
                /* MB is needed for host to see the interrupt mask first */
                virt_mb();
                vmbus_set_event(channel);
        }
}

/*
 * This is the irqcontrol callback to be registered to uio_info.
 * It can be used to disable/enable interrupt from user space processes.
 *
 * @param info
 *  pointer to uio_info.
 * @param irq_state
 *  state value. 1 to enable interrupt, 0 to disable interrupt.
 */
static int
hv_uio_irqcontrol(struct uio_info *info, s32 irq_state)
{
        struct hv_uio_private_data *pdata = info->priv;
        struct hv_device *dev = pdata->device;
        struct vmbus_channel *primary, *sc;

        primary = dev->channel;
        set_event(primary, irq_state);

        mutex_lock(&vmbus_connection.channel_mutex);
        list_for_each_entry(sc, &primary->sc_list, sc_list)
                set_event(sc, irq_state);
        mutex_unlock(&vmbus_connection.channel_mutex);

        return 0;
}

/*
 * Callback from vmbus_event when something is in inbound ring.
 */
static void hv_uio_channel_cb(void *context)
{
        struct vmbus_channel *chan = context;
        struct hv_device *hv_dev;
        struct hv_uio_private_data *pdata;

        virt_mb();

        /*
         * The callback may come from a subchannel, in which case look
         * for the hv device in the primary channel
         */
        hv_dev = chan->primary_channel ?
                 chan->primary_channel->device_obj : chan->device_obj;
        pdata = hv_get_drvdata(hv_dev);
        uio_event_notify(&pdata->info);
}

/*
 * Callback from vmbus_event when channel is rescinded.
 * It is meant for rescind of primary channels only.
 */
static void hv_uio_rescind(struct vmbus_channel *channel)
{
        struct hv_device *hv_dev = channel->device_obj;
        struct hv_uio_private_data *pdata = hv_get_drvdata(hv_dev);

        /*
         * Turn off the interrupt file handle
         * Next read for event will return -EIO
         */
        pdata->info.irq = 0;

        /* Wake up reader */
        uio_event_notify(&pdata->info);

        /*
         * With rescind callback registered, rescind path will not unregister the device
         * from vmbus when the primary channel is rescinded.
         * Without it, rescind handling is incomplete and next onoffer msg does not come.
         * Unregister the device from vmbus here.
         */
        vmbus_device_unregister(channel->device_obj);
}

/* Function used for mmap of ring buffer sysfs interface.
 * The ring buffer is allocated as contiguous memory by vmbus_open
 */
static int
hv_uio_ring_mmap(struct vmbus_channel *channel, struct vm_area_struct *vma)
{
        void *ring_buffer = page_address(channel->ringbuffer_page);

        if (channel->state != CHANNEL_OPENED_STATE)
                return -ENODEV;

        return vm_iomap_memory(vma, virt_to_phys(ring_buffer),
                               channel->ringbuffer_pagecount << PAGE_SHIFT);
}

/* Callback from VMBUS subsystem when new channel created. */
static void
hv_uio_new_channel(struct vmbus_channel *new_sc)
{
        struct hv_device *hv_dev = new_sc->primary_channel->device_obj;
        struct device *device = &hv_dev->device;
        const size_t ring_bytes = SZ_2M;
        int ret;

        /* Create host communication ring */
        ret = vmbus_open(new_sc, ring_bytes, ring_bytes, NULL, 0,
                         hv_uio_channel_cb, new_sc);
        if (ret) {
                dev_err(device, "vmbus_open subchannel failed: %d\n", ret);
                return;
        }

        set_channel_read_mode(new_sc, HV_CALL_ISR);
        ret = hv_create_ring_sysfs(new_sc, hv_uio_ring_mmap);
        if (ret) {
                dev_err(device, "sysfs create ring bin file failed; %d\n", ret);
                vmbus_close(new_sc);
        }
}

/* free the reserved buffers for send and receive */
static void
hv_uio_cleanup(struct hv_device *dev, struct hv_uio_private_data *pdata)
{
        if (pdata->send_gpadl.gpadl_handle) {
                vmbus_teardown_gpadl(dev->channel, &pdata->send_gpadl);
                if (!pdata->send_gpadl.decrypted)
                        vfree(pdata->send_buf);
        }

        if (pdata->recv_gpadl.gpadl_handle) {
                vmbus_teardown_gpadl(dev->channel, &pdata->recv_gpadl);
                if (!pdata->recv_gpadl.decrypted)
                        vfree(pdata->recv_buf);
        }
}

/* VMBus primary channel is opened on first use */
static int
hv_uio_open(struct uio_info *info, struct inode *inode)
{
        struct hv_uio_private_data *pdata
                = container_of(info, struct hv_uio_private_data, info);
        struct hv_device *dev = pdata->device;
        int ret;

        if (atomic_inc_return(&pdata->refcnt) != 1)
                return 0;

        vmbus_set_chn_rescind_callback(dev->channel, hv_uio_rescind);
        vmbus_set_sc_create_callback(dev->channel, hv_uio_new_channel);

        ret = vmbus_connect_ring(dev->channel,
                                 hv_uio_channel_cb, dev->channel);
        if (ret)
                atomic_dec(&pdata->refcnt);

        return ret;
}

/* VMBus primary channel is closed on last close */
static int
hv_uio_release(struct uio_info *info, struct inode *inode)
{
        struct hv_uio_private_data *pdata
                = container_of(info, struct hv_uio_private_data, info);
        struct hv_device *dev = pdata->device;
        int ret = 0;

        if (atomic_dec_and_test(&pdata->refcnt))
                ret = vmbus_disconnect_ring(dev->channel);

        return ret;
}

static int
hv_uio_probe(struct hv_device *dev,
             const struct hv_vmbus_device_id *dev_id)
{
        struct vmbus_channel *channel = dev->channel;
        struct hv_uio_private_data *pdata;
        void *ring_buffer;
        int ret;
        size_t ring_size = hv_dev_ring_size(channel);

        if (!ring_size)
                ring_size = SZ_2M;

        /* Adjust ring size if necessary to have it page aligned */
        ring_size = VMBUS_RING_SIZE(ring_size);

        pdata = devm_kzalloc(&dev->device, sizeof(*pdata), GFP_KERNEL);
        if (!pdata)
                return -ENOMEM;

        ret = vmbus_alloc_ring(channel, ring_size, ring_size);
        if (ret)
                return ret;

        set_channel_read_mode(channel, HV_CALL_ISR);

        /* Fill general uio info */
        pdata->info.name = "uio_hv_generic";
        pdata->info.version = DRIVER_VERSION;
        pdata->info.irqcontrol = hv_uio_irqcontrol;
        pdata->info.open = hv_uio_open;
        pdata->info.release = hv_uio_release;
        pdata->info.irq = UIO_IRQ_CUSTOM;
        atomic_set(&pdata->refcnt, 0);

        /* mem resources */
        pdata->info.mem[TXRX_RING_MAP].name = "txrx_rings";
        ring_buffer = page_address(channel->ringbuffer_page);
        pdata->info.mem[TXRX_RING_MAP].addr
                = (uintptr_t)virt_to_phys(ring_buffer);
        pdata->info.mem[TXRX_RING_MAP].size
                = channel->ringbuffer_pagecount << PAGE_SHIFT;
        pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_IOVA;

        pdata->info.mem[INT_PAGE_MAP].name = "int_page";
        pdata->info.mem[INT_PAGE_MAP].addr
                = (uintptr_t)vmbus_connection.int_page;
        pdata->info.mem[INT_PAGE_MAP].size = HV_HYP_PAGE_SIZE;
        pdata->info.mem[INT_PAGE_MAP].memtype = UIO_MEM_LOGICAL;

        pdata->info.mem[MON_PAGE_MAP].name = "monitor_page";
        pdata->info.mem[MON_PAGE_MAP].addr
                = (uintptr_t)vmbus_connection.monitor_pages[1];
        pdata->info.mem[MON_PAGE_MAP].size = HV_HYP_PAGE_SIZE;
        pdata->info.mem[MON_PAGE_MAP].memtype = UIO_MEM_LOGICAL;

        if (channel->device_id == HV_NIC) {
                pdata->recv_buf = vzalloc(RECV_BUFFER_SIZE);
                if (!pdata->recv_buf) {
                        ret = -ENOMEM;
                        goto fail_free_ring;
                }

                ret = vmbus_establish_gpadl(channel, pdata->recv_buf,
                                            RECV_BUFFER_SIZE, &pdata->recv_gpadl);
                if (ret) {
                        if (!pdata->recv_gpadl.decrypted)
                                vfree(pdata->recv_buf);
                        goto fail_close;
                }

                /* put Global Physical Address Label in name */
                snprintf(pdata->recv_name, sizeof(pdata->recv_name),
                         "recv:%u", pdata->recv_gpadl.gpadl_handle);
                pdata->info.mem[RECV_BUF_MAP].name = pdata->recv_name;
                pdata->info.mem[RECV_BUF_MAP].addr = (uintptr_t)pdata->recv_buf;
                pdata->info.mem[RECV_BUF_MAP].size = RECV_BUFFER_SIZE;
                pdata->info.mem[RECV_BUF_MAP].memtype = UIO_MEM_VIRTUAL;

                pdata->send_buf = vzalloc(SEND_BUFFER_SIZE);
                if (!pdata->send_buf) {
                        ret = -ENOMEM;
                        goto fail_close;
                }

                ret = vmbus_establish_gpadl(channel, pdata->send_buf,
                                            SEND_BUFFER_SIZE, &pdata->send_gpadl);
                if (ret) {
                        if (!pdata->send_gpadl.decrypted)
                                vfree(pdata->send_buf);
                        goto fail_close;
                }

                snprintf(pdata->send_name, sizeof(pdata->send_name),
                         "send:%u", pdata->send_gpadl.gpadl_handle);
                pdata->info.mem[SEND_BUF_MAP].name = pdata->send_name;
                pdata->info.mem[SEND_BUF_MAP].addr = (uintptr_t)pdata->send_buf;
                pdata->info.mem[SEND_BUF_MAP].size = SEND_BUFFER_SIZE;
                pdata->info.mem[SEND_BUF_MAP].memtype = UIO_MEM_VIRTUAL;
        }

        pdata->info.priv = pdata;
        pdata->device = dev;

        ret = uio_register_device(&dev->device, &pdata->info);
        if (ret) {
                dev_err(&dev->device, "hv_uio register failed\n");
                goto fail_close;
        }

        /*
         * This internally calls sysfs_update_group, which returns a non-zero value if it executes
         * before sysfs_create_group. This is expected as the 'ring' will be created later in
         * vmbus_device_register() -> vmbus_add_channel_kobj(). Thus, no need to check the return
         * value and print warning.
         *
         * Creating/exposing sysfs in driver probe is not encouraged as it can lead to race
         * conditions with userspace. For backward compatibility, "ring" sysfs could not be removed
         * or decoupled from uio_hv_generic probe. Userspace programs can make use of inotify
         * APIs to make sure that ring is created.
         */
        hv_create_ring_sysfs(channel, hv_uio_ring_mmap);

        hv_set_drvdata(dev, pdata);

        return 0;

fail_close:
        hv_uio_cleanup(dev, pdata);
fail_free_ring:
        vmbus_free_ring(dev->channel);

        return ret;
}

static void
hv_uio_remove(struct hv_device *dev)
{
        struct hv_uio_private_data *pdata = hv_get_drvdata(dev);

        if (!pdata)
                return;

        hv_remove_ring_sysfs(dev->channel);
        uio_unregister_device(&pdata->info);
        hv_uio_cleanup(dev, pdata);

        vmbus_free_ring(dev->channel);
}

static struct hv_driver hv_uio_drv = {
        .name = "uio_hv_generic",
        .id_table = NULL, /* only dynamic id's */
        .probe = hv_uio_probe,
        .remove = hv_uio_remove,
};

static int __init
hyperv_module_init(void)
{
        return vmbus_driver_register(&hv_uio_drv);
}

static void __exit
hyperv_module_exit(void)
{
        vmbus_driver_unregister(&hv_uio_drv);
}

module_init(hyperv_module_init);
module_exit(hyperv_module_exit);

MODULE_VERSION(DRIVER_VERSION);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);