root/drivers/block/aoe/aoeblk.c
/* Copyright (c) 2013 Coraid, Inc.  See COPYING for GPL terms. */
/*
 * aoeblk.c
 * block device routines
 */

#include <linux/kernel.h>
#include <linux/hdreg.h>
#include <linux/blk-mq.h>
#include <linux/backing-dev.h>
#include <linux/fs.h>
#include <linux/ioctl.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/netdevice.h>
#include <linux/mutex.h>
#include <linux/export.h>
#include <linux/moduleparam.h>
#include <linux/debugfs.h>
#include <scsi/sg.h>
#include "aoe.h"

static DEFINE_MUTEX(aoeblk_mutex);
static struct kmem_cache *buf_pool_cache;
static struct dentry *aoe_debugfs_dir;

/* random default picked from the historic block max_sectors cap */
static int aoe_maxsectors = 2560;
module_param(aoe_maxsectors, int, 0644);
MODULE_PARM_DESC(aoe_maxsectors,
        "When nonzero, set the maximum number of sectors per I/O request");

static ssize_t aoedisk_show_state(struct device *dev,
                                  struct device_attribute *attr, char *page)
{
        struct gendisk *disk = dev_to_disk(dev);
        struct aoedev *d = disk->private_data;

        return sysfs_emit(page, "%s%s\n",
                        (d->flags & DEVFL_UP) ? "up" : "down",
                        (d->flags & DEVFL_KICKME) ? ",kickme" :
                        (d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : "");
        /* I'd rather see nopen exported so we can ditch closewait */
}
static ssize_t aoedisk_show_mac(struct device *dev,
                                struct device_attribute *attr, char *page)
{
        struct gendisk *disk = dev_to_disk(dev);
        struct aoedev *d = disk->private_data;
        struct aoetgt *t = d->targets[0];

        if (t == NULL)
                return sysfs_emit(page, "none\n");
        return sysfs_emit(page, "%pm\n", t->addr);
}
static ssize_t aoedisk_show_netif(struct device *dev,
                                  struct device_attribute *attr, char *page)
{
        struct gendisk *disk = dev_to_disk(dev);
        struct aoedev *d = disk->private_data;
        struct net_device *nds[8], **nd, **nnd, **ne;
        struct aoetgt **t, **te;
        struct aoeif *ifp, *e;
        char *p;

        memset(nds, 0, sizeof nds);
        nd = nds;
        ne = nd + ARRAY_SIZE(nds);
        t = d->targets;
        te = t + d->ntargets;
        for (; t < te && *t; t++) {
                ifp = (*t)->ifs;
                e = ifp + NAOEIFS;
                for (; ifp < e && ifp->nd; ifp++) {
                        for (nnd = nds; nnd < nd; nnd++)
                                if (*nnd == ifp->nd)
                                        break;
                        if (nnd == nd && nd != ne)
                                *nd++ = ifp->nd;
                }
        }

        ne = nd;
        nd = nds;
        if (*nd == NULL)
                return sysfs_emit(page, "none\n");
        for (p = page; nd < ne; nd++)
                p += scnprintf(p, PAGE_SIZE - (p-page), "%s%s",
                        p == page ? "" : ",", (*nd)->name);
        p += scnprintf(p, PAGE_SIZE - (p-page), "\n");
        return p-page;
}
/* firmware version */
static ssize_t aoedisk_show_fwver(struct device *dev,
                                  struct device_attribute *attr, char *page)
{
        struct gendisk *disk = dev_to_disk(dev);
        struct aoedev *d = disk->private_data;

        return sysfs_emit(page, "0x%04x\n", (unsigned int) d->fw_ver);
}
static ssize_t aoedisk_show_payload(struct device *dev,
                                    struct device_attribute *attr, char *page)
{
        struct gendisk *disk = dev_to_disk(dev);
        struct aoedev *d = disk->private_data;

        return sysfs_emit(page, "%lu\n", d->maxbcnt);
}

static int aoe_debugfs_show(struct seq_file *s, void *ignored)
{
        struct aoedev *d;
        struct aoetgt **t, **te;
        struct aoeif *ifp, *ife;
        unsigned long flags;
        char c;

        d = s->private;
        seq_printf(s, "rttavg: %d rttdev: %d\n",
                d->rttavg >> RTTSCALE,
                d->rttdev >> RTTDSCALE);
        seq_printf(s, "nskbpool: %d\n", skb_queue_len(&d->skbpool));
        seq_printf(s, "kicked: %ld\n", d->kicked);
        seq_printf(s, "maxbcnt: %ld\n", d->maxbcnt);
        seq_printf(s, "ref: %ld\n", d->ref);

        spin_lock_irqsave(&d->lock, flags);
        t = d->targets;
        te = t + d->ntargets;
        for (; t < te && *t; t++) {
                c = '\t';
                seq_printf(s, "falloc: %ld\n", (*t)->falloc);
                seq_printf(s, "ffree: %p\n",
                        list_empty(&(*t)->ffree) ? NULL : (*t)->ffree.next);
                seq_printf(s, "%pm:%d:%d:%d\n", (*t)->addr, (*t)->nout,
                        (*t)->maxout, (*t)->nframes);
                seq_printf(s, "\tssthresh:%d\n", (*t)->ssthresh);
                seq_printf(s, "\ttaint:%d\n", (*t)->taint);
                seq_printf(s, "\tr:%d\n", (*t)->rpkts);
                seq_printf(s, "\tw:%d\n", (*t)->wpkts);
                ifp = (*t)->ifs;
                ife = ifp + ARRAY_SIZE((*t)->ifs);
                for (; ifp->nd && ifp < ife; ifp++) {
                        seq_printf(s, "%c%s", c, ifp->nd->name);
                        c = ',';
                }
                seq_puts(s, "\n");
        }
        spin_unlock_irqrestore(&d->lock, flags);

        return 0;
}
DEFINE_SHOW_ATTRIBUTE(aoe_debugfs);

static DEVICE_ATTR(state, 0444, aoedisk_show_state, NULL);
static DEVICE_ATTR(mac, 0444, aoedisk_show_mac, NULL);
static DEVICE_ATTR(netif, 0444, aoedisk_show_netif, NULL);
static struct device_attribute dev_attr_firmware_version = {
        .attr = { .name = "firmware-version", .mode = 0444 },
        .show = aoedisk_show_fwver,
};
static DEVICE_ATTR(payload, 0444, aoedisk_show_payload, NULL);

static struct attribute *aoe_attrs[] = {
        &dev_attr_state.attr,
        &dev_attr_mac.attr,
        &dev_attr_netif.attr,
        &dev_attr_firmware_version.attr,
        &dev_attr_payload.attr,
        NULL,
};

static const struct attribute_group aoe_attr_group = {
        .attrs = aoe_attrs,
};

static const struct attribute_group *aoe_attr_groups[] = {
        &aoe_attr_group,
        NULL,
};

static void
aoedisk_add_debugfs(struct aoedev *d)
{
        char *p;

        if (aoe_debugfs_dir == NULL)
                return;
        p = strchr(d->gd->disk_name, '/');
        if (p == NULL)
                p = d->gd->disk_name;
        else
                p++;
        BUG_ON(*p == '\0');
        d->debugfs = debugfs_create_file(p, 0444, aoe_debugfs_dir, d,
                                         &aoe_debugfs_fops);
}
void
aoedisk_rm_debugfs(struct aoedev *d)
{
        debugfs_remove(d->debugfs);
        d->debugfs = NULL;
}

static int
aoeblk_open(struct gendisk *disk, blk_mode_t mode)
{
        struct aoedev *d = disk->private_data;
        ulong flags;

        if (!virt_addr_valid(d)) {
                pr_crit("aoe: invalid device pointer in %s\n",
                        __func__);
                WARN_ON(1);
                return -ENODEV;
        }
        if (!(d->flags & DEVFL_UP) || d->flags & DEVFL_TKILL)
                return -ENODEV;

        mutex_lock(&aoeblk_mutex);
        spin_lock_irqsave(&d->lock, flags);
        if (d->flags & DEVFL_UP && !(d->flags & DEVFL_TKILL)) {
                d->nopen++;
                spin_unlock_irqrestore(&d->lock, flags);
                mutex_unlock(&aoeblk_mutex);
                return 0;
        }
        spin_unlock_irqrestore(&d->lock, flags);
        mutex_unlock(&aoeblk_mutex);
        return -ENODEV;
}

static void
aoeblk_release(struct gendisk *disk)
{
        struct aoedev *d = disk->private_data;
        ulong flags;

        spin_lock_irqsave(&d->lock, flags);

        if (--d->nopen == 0) {
                spin_unlock_irqrestore(&d->lock, flags);
                aoecmd_cfg(d->aoemajor, d->aoeminor);
                return;
        }
        spin_unlock_irqrestore(&d->lock, flags);
}

static blk_status_t aoeblk_queue_rq(struct blk_mq_hw_ctx *hctx,
                                    const struct blk_mq_queue_data *bd)
{
        struct aoedev *d = hctx->queue->queuedata;

        spin_lock_irq(&d->lock);

        if ((d->flags & DEVFL_UP) == 0) {
                pr_info_ratelimited("aoe: device %ld.%d is not up\n",
                        d->aoemajor, d->aoeminor);
                spin_unlock_irq(&d->lock);
                blk_mq_start_request(bd->rq);
                return BLK_STS_IOERR;
        }

        list_add_tail(&bd->rq->queuelist, &d->rq_list);
        aoecmd_work(d);
        spin_unlock_irq(&d->lock);
        return BLK_STS_OK;
}

static int
aoeblk_getgeo(struct gendisk *disk, struct hd_geometry *geo)
{
        struct aoedev *d = disk->private_data;

        if ((d->flags & DEVFL_UP) == 0) {
                printk(KERN_ERR "aoe: disk not up\n");
                return -ENODEV;
        }

        geo->cylinders = d->geo.cylinders;
        geo->heads = d->geo.heads;
        geo->sectors = d->geo.sectors;
        return 0;
}

static int
aoeblk_ioctl(struct block_device *bdev, blk_mode_t mode, uint cmd, ulong arg)
{
        struct aoedev *d;

        if (!arg)
                return -EINVAL;

        d = bdev->bd_disk->private_data;
        if ((d->flags & DEVFL_UP) == 0) {
                pr_err("aoe: disk not up\n");
                return -ENODEV;
        }

        if (cmd == HDIO_GET_IDENTITY) {
                if (!copy_to_user((void __user *) arg, &d->ident,
                        sizeof(d->ident)))
                        return 0;
                return -EFAULT;
        }

        /* udev calls scsi_id, which uses SG_IO, resulting in noise */
        if (cmd != SG_IO)
                pr_info("aoe: unknown ioctl 0x%x\n", cmd);

        return -ENOTTY;
}

static const struct block_device_operations aoe_bdops = {
        .open = aoeblk_open,
        .release = aoeblk_release,
        .ioctl = aoeblk_ioctl,
        .compat_ioctl = blkdev_compat_ptr_ioctl,
        .getgeo = aoeblk_getgeo,
        .owner = THIS_MODULE,
};

static const struct blk_mq_ops aoeblk_mq_ops = {
        .queue_rq       = aoeblk_queue_rq,
};

/* blk_mq_alloc_disk and add_disk can sleep */
void
aoeblk_gdalloc(void *vp)
{
        struct aoedev *d = vp;
        struct gendisk *gd;
        mempool_t *mp;
        struct blk_mq_tag_set *set;
        sector_t ssize;
        struct queue_limits lim = {
                .max_hw_sectors         = aoe_maxsectors,
                .io_opt                 = SZ_2M,
                .features               = BLK_FEAT_ROTATIONAL,
        };
        ulong flags;
        int late = 0;
        int err;

        spin_lock_irqsave(&d->lock, flags);
        if (d->flags & DEVFL_GDALLOC
        && !(d->flags & DEVFL_TKILL)
        && !(d->flags & DEVFL_GD_NOW))
                d->flags |= DEVFL_GD_NOW;
        else
                late = 1;
        spin_unlock_irqrestore(&d->lock, flags);
        if (late)
                return;

        mp = mempool_create(MIN_BUFS, mempool_alloc_slab, mempool_free_slab,
                buf_pool_cache);
        if (mp == NULL) {
                printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n",
                        d->aoemajor, d->aoeminor);
                goto err;
        }

        set = &d->tag_set;
        set->ops = &aoeblk_mq_ops;
        set->cmd_size = sizeof(struct aoe_req);
        set->nr_hw_queues = 1;
        set->queue_depth = 128;
        set->numa_node = NUMA_NO_NODE;
        err = blk_mq_alloc_tag_set(set);
        if (err) {
                pr_err("aoe: cannot allocate tag set for %ld.%d\n",
                        d->aoemajor, d->aoeminor);
                goto err_mempool;
        }

        gd = blk_mq_alloc_disk(set, &lim, d);
        if (IS_ERR(gd)) {
                pr_err("aoe: cannot allocate block queue for %ld.%d\n",
                        d->aoemajor, d->aoeminor);
                goto err_tagset;
        }

        spin_lock_irqsave(&d->lock, flags);
        WARN_ON(!(d->flags & DEVFL_GD_NOW));
        WARN_ON(!(d->flags & DEVFL_GDALLOC));
        WARN_ON(d->flags & DEVFL_TKILL);
        WARN_ON(d->gd);
        WARN_ON(d->flags & DEVFL_UP);
        d->bufpool = mp;
        d->blkq = gd->queue;
        d->gd = gd;
        gd->major = AOE_MAJOR;
        gd->first_minor = d->sysminor;
        gd->minors = AOE_PARTITIONS;
        gd->fops = &aoe_bdops;
        gd->private_data = d;
        ssize = d->ssize;
        snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
                d->aoemajor, d->aoeminor);

        d->flags &= ~DEVFL_GDALLOC;
        d->flags |= DEVFL_UP;

        spin_unlock_irqrestore(&d->lock, flags);

        set_capacity(gd, ssize);

        err = device_add_disk(NULL, gd, aoe_attr_groups);
        if (err)
                goto out_disk_cleanup;
        aoedisk_add_debugfs(d);

        spin_lock_irqsave(&d->lock, flags);
        WARN_ON(!(d->flags & DEVFL_GD_NOW));
        d->flags &= ~DEVFL_GD_NOW;
        spin_unlock_irqrestore(&d->lock, flags);
        return;

out_disk_cleanup:
        put_disk(gd);
err_tagset:
        blk_mq_free_tag_set(set);
err_mempool:
        mempool_destroy(mp);
err:
        spin_lock_irqsave(&d->lock, flags);
        d->flags &= ~DEVFL_GD_NOW;
        queue_work(aoe_wq, &d->work);
        spin_unlock_irqrestore(&d->lock, flags);
}

void
aoeblk_exit(void)
{
        debugfs_remove_recursive(aoe_debugfs_dir);
        aoe_debugfs_dir = NULL;
        kmem_cache_destroy(buf_pool_cache);
}

int __init
aoeblk_init(void)
{
        buf_pool_cache = kmem_cache_create("aoe_bufs",
                                           sizeof(struct buf),
                                           0, 0, NULL);
        if (buf_pool_cache == NULL)
                return -ENOMEM;
        aoe_debugfs_dir = debugfs_create_dir("aoe", NULL);
        return 0;
}