root/tools/testing/selftests/ublk/stripe.c
// SPDX-License-Identifier: GPL-2.0

#include "kublk.h"

#define NR_STRIPE  MAX_BACK_FILES

struct stripe_conf {
        unsigned nr_files;
        unsigned shift;
};

struct stripe {
        loff_t          start;
        unsigned        nr_sects;
        int             seq;

        struct iovec    *vec;
        unsigned        nr_vec;
        unsigned        cap;
};

struct stripe_array {
        struct stripe   s[NR_STRIPE];
        unsigned        nr;
        struct iovec    _vec[];
};

static inline const struct stripe_conf *get_chunk_shift(const struct ublk_queue *q)
{
        return (struct stripe_conf *)q->dev->private_data;
}

static inline unsigned calculate_nr_vec(const struct stripe_conf *conf,
                const struct ublksrv_io_desc *iod)
{
        const unsigned shift = conf->shift - 9;
        const unsigned unit_sects = conf->nr_files << shift;
        loff_t start = iod->start_sector;
        loff_t end = start + iod->nr_sectors;

        return (end / unit_sects) - (start / unit_sects) + 1;
}

static struct stripe_array *alloc_stripe_array(const struct stripe_conf *conf,
                const struct ublksrv_io_desc *iod)
{
        unsigned nr_vecs = calculate_nr_vec(conf, iod);
        unsigned total = nr_vecs * conf->nr_files;
        struct stripe_array *s;
        int i;

        s = malloc(sizeof(*s) + total * sizeof(struct iovec));

        s->nr = 0;
        for (i = 0; i < conf->nr_files; i++) {
                struct stripe *t = &s->s[i];

                t->nr_vec = 0;
                t->vec = &s->_vec[i * nr_vecs];
                t->nr_sects = 0;
                t->cap = nr_vecs;
        }

        return s;
}

static void free_stripe_array(struct stripe_array *s)
{
        free(s);
}

static void calculate_stripe_array(const struct stripe_conf *conf,
                const struct ublksrv_io_desc *iod, struct stripe_array *s, void *base)
{
        const unsigned shift = conf->shift - 9;
        const unsigned chunk_sects = 1 << shift;
        const unsigned unit_sects = conf->nr_files << shift;
        off64_t start = iod->start_sector;
        off64_t end = start + iod->nr_sectors;
        unsigned long done = 0;
        unsigned idx = 0;

        while (start < end) {
                unsigned nr_sects = chunk_sects - (start & (chunk_sects - 1));
                loff_t unit_off = (start / unit_sects) * unit_sects;
                unsigned seq = (start - unit_off) >> shift;
                struct stripe *this = &s->s[idx];
                loff_t stripe_off = (unit_off / conf->nr_files) +
                        (start & (chunk_sects - 1));

                if (nr_sects > end - start)
                        nr_sects = end - start;
                if (this->nr_sects == 0) {
                        this->nr_sects = nr_sects;
                        this->start = stripe_off;
                        this->seq = seq;
                        s->nr += 1;
                } else {
                        ublk_assert(seq == this->seq);
                        ublk_assert(this->start + this->nr_sects == stripe_off);
                        this->nr_sects += nr_sects;
                }

                ublk_assert(this->nr_vec < this->cap);
                this->vec[this->nr_vec].iov_base = (void *)(base + done);
                this->vec[this->nr_vec++].iov_len = nr_sects << 9;

                start += nr_sects;
                done += nr_sects << 9;
                idx = (idx + 1) % conf->nr_files;
        }
}

static inline enum io_uring_op stripe_to_uring_op(
                const struct ublksrv_io_desc *iod, int zc)
{
        unsigned ublk_op = ublksrv_get_op(iod);

        if (ublk_op == UBLK_IO_OP_READ)
                return zc ? IORING_OP_READV_FIXED : IORING_OP_READV;
        else if (ublk_op == UBLK_IO_OP_WRITE)
                return zc ? IORING_OP_WRITEV_FIXED : IORING_OP_WRITEV;
        ublk_assert(0);
}

static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
                                  const struct ublksrv_io_desc *iod, int tag)
{
        const struct stripe_conf *conf = get_chunk_shift(q);
        unsigned auto_zc = (ublk_queue_use_auto_zc(q) != 0);
        unsigned zc = (ublk_queue_use_zc(q) != 0);
        enum io_uring_op op = stripe_to_uring_op(iod, zc | auto_zc);
        struct io_uring_sqe *sqe[NR_STRIPE];
        struct stripe_array *s = alloc_stripe_array(conf, iod);
        struct ublk_io *io = ublk_get_io(q, tag);
        int i, extra = zc ? 2 : 0;
        void *base = io->buf_addr;
        unsigned short buf_idx = ublk_io_buf_idx(t, q, tag);

        io->private_data = s;
        calculate_stripe_array(conf, iod, s, base);

        ublk_io_alloc_sqes(t, sqe, s->nr + extra);

        if (zc) {
                io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, buf_idx);
                sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
                sqe[0]->user_data = build_user_data(tag,
                        ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
        }

        for (i = zc; i < s->nr + extra - zc; i++) {
                struct stripe *t = &s->s[i - zc];

                io_uring_prep_rw(op, sqe[i],
                                t->seq + 1,
                                (void *)t->vec,
                                t->nr_vec,
                                t->start << 9);
                io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
                if (auto_zc || zc) {
                        sqe[i]->buf_index = buf_idx;
                        if (zc)
                                sqe[i]->flags |= IOSQE_IO_HARDLINK;
                }
                /* bit63 marks us as tgt io */
                sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, q->q_id, 1);
        }
        if (zc) {
                struct io_uring_sqe *unreg = sqe[s->nr + 1];

                io_uring_prep_buf_unregister(unreg, q, tag, q->q_id, buf_idx);
                unreg->user_data = build_user_data(
                        tag, ublk_cmd_op_nr(unreg->cmd_op), 0, q->q_id, 1);
        }

        /* register buffer is skip_success */
        return s->nr + zc;
}

static int handle_flush(struct ublk_thread *t, struct ublk_queue *q,
                        const struct ublksrv_io_desc *iod, int tag)
{
        const struct stripe_conf *conf = get_chunk_shift(q);
        struct io_uring_sqe *sqe[NR_STRIPE];
        int i;

        ublk_io_alloc_sqes(t, sqe, conf->nr_files);
        for (i = 0; i < conf->nr_files; i++) {
                io_uring_prep_fsync(sqe[i], i + 1, IORING_FSYNC_DATASYNC);
                io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
                sqe[i]->user_data = build_user_data(tag, UBLK_IO_OP_FLUSH, 0, q->q_id, 1);
        }
        return conf->nr_files;
}

static int stripe_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q,
                               int tag)
{
        const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
        unsigned ublk_op = ublksrv_get_op(iod);
        int ret = 0;

        switch (ublk_op) {
        case UBLK_IO_OP_FLUSH:
                ret = handle_flush(t, q, iod, tag);
                break;
        case UBLK_IO_OP_WRITE_ZEROES:
        case UBLK_IO_OP_DISCARD:
                ret = -ENOTSUP;
                break;
        case UBLK_IO_OP_READ:
        case UBLK_IO_OP_WRITE:
                ret = stripe_queue_tgt_rw_io(t, q, iod, tag);
                break;
        default:
                ret = -EINVAL;
                break;
        }
        ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u ret %d\n", __func__, tag,
                        iod->op_flags, iod->start_sector, iod->nr_sectors << 9, ret);
        return ret;
}

static int ublk_stripe_queue_io(struct ublk_thread *t, struct ublk_queue *q,
                                int tag)
{
        int queued = stripe_queue_tgt_io(t, q, tag);

        ublk_queued_tgt_io(t, q, tag, queued);
        return 0;
}

static void ublk_stripe_io_done(struct ublk_thread *t, struct ublk_queue *q,
                                const struct io_uring_cqe *cqe)
{
        unsigned tag = user_data_to_tag(cqe->user_data);
        const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
        unsigned op = user_data_to_op(cqe->user_data);
        struct ublk_io *io = ublk_get_io(q, tag);
        int res = cqe->res;

        if (res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) {
                if (!io->result)
                        io->result = res;
                if (res < 0)
                        ublk_err("%s: io failure %d tag %u\n", __func__, res, tag);
        }

        /* buffer register op is IOSQE_CQE_SKIP_SUCCESS */
        if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF))
                io->tgt_ios += 1;

        /* fail short READ/WRITE simply */
        if (op == UBLK_IO_OP_READ || op == UBLK_IO_OP_WRITE) {
                unsigned seq = user_data_to_tgt_data(cqe->user_data);
                struct stripe_array *s = io->private_data;

                if (res < s->s[seq].nr_sects << 9) {
                        io->result = -EIO;
                        ublk_err("%s: short rw op %u res %d exp %u tag %u\n",
                                        __func__, op, res, s->s[seq].vec->iov_len, tag);
                }
        }

        if (ublk_completed_tgt_io(t, q, tag)) {
                int res = io->result;

                if (!res)
                        res = iod->nr_sectors << 9;

                ublk_complete_io(t, q, tag, res);

                free_stripe_array(io->private_data);
                io->private_data = NULL;
        }
}

static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
{
        struct ublk_params p = {
                .types = UBLK_PARAM_TYPE_BASIC,
                .basic = {
                        .attrs = UBLK_ATTR_VOLATILE_CACHE,
                        .logical_bs_shift       = 9,
                        .physical_bs_shift      = 12,
                        .io_opt_shift   = 12,
                        .io_min_shift   = 9,
                        .max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
                },
        };
        unsigned chunk_size = ctx->stripe.chunk_size;
        struct stripe_conf *conf;
        unsigned chunk_shift;
        loff_t bytes = 0;
        int ret, i, mul = 1;

        if (ctx->auto_zc_fallback) {
                ublk_err("%s: not support auto_zc_fallback\n", __func__);
                return -EINVAL;
        }
        if (ctx->metadata_size) {
                ublk_err("%s: integrity not supported\n", __func__);
                return -EINVAL;
        }

        if ((chunk_size & (chunk_size - 1)) || !chunk_size) {
                ublk_err("invalid chunk size %u\n", chunk_size);
                return -EINVAL;
        }

        if (chunk_size < 4096 || chunk_size > 512 * 1024) {
                ublk_err("invalid chunk size %u\n", chunk_size);
                return -EINVAL;
        }

        chunk_shift = ilog2(chunk_size);

        ret = backing_file_tgt_init(dev, dev->tgt.nr_backing_files);
        if (ret)
                return ret;

        if (!dev->tgt.nr_backing_files || dev->tgt.nr_backing_files > NR_STRIPE)
                return -EINVAL;

        ublk_assert(dev->nr_fds == dev->tgt.nr_backing_files + 1);

        for (i = 0; i < dev->tgt.nr_backing_files; i++)
                dev->tgt.backing_file_size[i] &= ~((1 << chunk_shift) - 1);

        for (i = 0; i < dev->tgt.nr_backing_files; i++) {
                unsigned long size = dev->tgt.backing_file_size[i];

                if (size != dev->tgt.backing_file_size[0])
                        return -EINVAL;
                bytes += size;
        }

        conf = malloc(sizeof(*conf));
        conf->shift = chunk_shift;
        conf->nr_files = dev->tgt.nr_backing_files;

        dev->private_data = conf;
        dev->tgt.dev_size = bytes;
        p.basic.dev_sectors = bytes >> 9;
        dev->tgt.params = p;

        if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY)
                mul = 2;
        dev->tgt.sq_depth = mul * dev->dev_info.queue_depth * conf->nr_files;
        dev->tgt.cq_depth = mul * dev->dev_info.queue_depth * conf->nr_files;

        printf("%s: shift %u files %u\n", __func__, conf->shift, conf->nr_files);

        return 0;
}

static void ublk_stripe_tgt_deinit(struct ublk_dev *dev)
{
        free(dev->private_data);
        backing_file_tgt_deinit(dev);
}

static void ublk_stripe_cmd_line(struct dev_ctx *ctx, int argc, char *argv[])
{
        static const struct option longopts[] = {
                { "chunk_size",         1,      NULL,  0  },
                { 0, 0, 0, 0 }
        };
        int option_idx, opt;

        ctx->stripe.chunk_size = 65536;
        while ((opt = getopt_long(argc, argv, "",
                                  longopts, &option_idx)) != -1) {
                switch (opt) {
                case 0:
                        if (!strcmp(longopts[option_idx].name, "chunk_size"))
                                ctx->stripe.chunk_size = strtol(optarg, NULL, 10);
                }
        }
}

static void ublk_stripe_usage(const struct ublk_tgt_ops *ops)
{
        printf("\tstripe: [--chunk_size chunk_size (default 65536)]\n");
}

const struct ublk_tgt_ops stripe_tgt_ops = {
        .name = "stripe",
        .init_tgt = ublk_stripe_tgt_init,
        .deinit_tgt = ublk_stripe_tgt_deinit,
        .queue_io = ublk_stripe_queue_io,
        .tgt_io_done = ublk_stripe_io_done,
        .parse_cmd_line = ublk_stripe_cmd_line,
        .usage = ublk_stripe_usage,
};