root/tools/testing/selftests/ublk/kublk.h
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef KUBLK_INTERNAL_H
#define KUBLK_INTERNAL_H

#include <unistd.h>
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <pthread.h>
#include <getopt.h>
#include <limits.h>
#include <poll.h>
#include <fcntl.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/inotify.h>
#include <sys/wait.h>
#include <sys/eventfd.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <linux/io_uring.h>
#include <liburing.h>
#include <semaphore.h>

/* allow ublk_dep.h to override ublk_cmd.h */
#include "ublk_dep.h"
#include <linux/ublk_cmd.h>

#include "utils.h"

#define MAX_BACK_FILES   4

/****************** part 1: libublk ********************/

#define CTRL_DEV                "/dev/ublk-control"
#define UBLKC_DEV               "/dev/ublkc"
#define UBLKB_DEV               "/dev/ublkb"
#define UBLK_CTRL_RING_DEPTH            32
#define ERROR_EVTFD_DEVID       -2

#define UBLK_IO_MAX_BYTES               (1 << 20)
#define UBLK_MAX_QUEUES_SHIFT           5
#define UBLK_MAX_QUEUES                 (1 << UBLK_MAX_QUEUES_SHIFT)
#define UBLK_MAX_THREADS_SHIFT          5
#define UBLK_MAX_THREADS                (1 << UBLK_MAX_THREADS_SHIFT)
#define UBLK_QUEUE_DEPTH                1024

struct ublk_dev;
struct ublk_queue;
struct ublk_thread;

struct stripe_ctx {
        /* stripe */
        unsigned int    chunk_size;
};

struct fault_inject_ctx {
        /* fault_inject */
        unsigned long   delay_us;
};

struct dev_ctx {
        char tgt_type[16];
        unsigned long flags;
        unsigned nr_hw_queues;
        unsigned short nthreads;
        unsigned queue_depth;
        int dev_id;
        int nr_files;
        char *files[MAX_BACK_FILES];
        unsigned int    logging:1;
        unsigned int    all:1;
        unsigned int    fg:1;
        unsigned int    recovery:1;
        unsigned int    auto_zc_fallback:1;
        unsigned int    per_io_tasks:1;
        unsigned int    no_ublk_fixed_fd:1;
        unsigned int    safe_stop:1;
        unsigned int    no_auto_part_scan:1;
        __u32 integrity_flags;
        __u8 metadata_size;
        __u8 pi_offset;
        __u8 csum_type;
        __u8 tag_size;

        int _evtfd;
        int _shmid;

        /* built from shmem, only for ublk_dump_dev() */
        struct ublk_dev *shadow_dev;

        /* for 'update_size' command */
        unsigned long long size;

        union {
                struct stripe_ctx       stripe;
                struct fault_inject_ctx fault_inject;
        };
};

struct ublk_ctrl_cmd_data {
        __u32 cmd_op;
#define CTRL_CMD_HAS_DATA       1
#define CTRL_CMD_HAS_BUF        2
        __u32 flags;

        __u64 data[2];
        __u64 addr;
        __u32 len;
};

struct ublk_io {
        char *buf_addr;
        void *integrity_buf;

#define UBLKS_IO_NEED_FETCH_RQ          (1UL << 0)
#define UBLKS_IO_NEED_COMMIT_RQ_COMP    (1UL << 1)
#define UBLKS_IO_FREE                   (1UL << 2)
#define UBLKS_IO_NEED_GET_DATA           (1UL << 3)
#define UBLKS_IO_NEED_REG_BUF            (1UL << 4)
        unsigned short flags;
        unsigned short refs;            /* used by target code only */

        int tag;

        int result;

        unsigned short buf_index;
        unsigned short tgt_ios;
        void *private_data;
};

struct ublk_tgt_ops {
        const char *name;
        int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *);
        void (*deinit_tgt)(struct ublk_dev *);

        int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag);
        void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *,
                            const struct io_uring_cqe *);

        /*
         * Target specific command line handling
         *
         * each option requires argument for target command line
         */
        void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]);
        void (*usage)(const struct ublk_tgt_ops *ops);

        /* return buffer index for UBLK_F_AUTO_BUF_REG */
        unsigned short (*buf_index)(const struct ublk_thread *t,
                        const struct ublk_queue *, int tag);
};

struct ublk_tgt {
        unsigned long dev_size;
        unsigned int  sq_depth;
        unsigned int  cq_depth;
        const struct ublk_tgt_ops *ops;
        struct ublk_params params;

        int nr_backing_files;
        unsigned long backing_file_size[MAX_BACK_FILES];
        char backing_file[MAX_BACK_FILES][PATH_MAX];
};

struct ublk_queue {
        int q_id;
        int q_depth;
        struct ublk_dev *dev;
        const struct ublk_tgt_ops *tgt_ops;
        struct ublksrv_io_desc *io_cmd_buf;

/* borrow three bit of ublk uapi flags, which may never be used */
#define UBLKS_Q_AUTO_BUF_REG_FALLBACK   (1ULL << 63)
#define UBLKS_Q_NO_UBLK_FIXED_FD        (1ULL << 62)
#define UBLKS_Q_PREPARED        (1ULL << 61)
        __u64 flags;
        int ublk_fd;    /* cached ublk char device fd */
        __u8 metadata_size;
        struct ublk_io ios[UBLK_QUEUE_DEPTH];

        /* used for prep io commands */
        pthread_spinlock_t lock;
};

/* align with `ublk_elem_header` */
struct ublk_batch_elem {
        __u16 tag;
        __u16 buf_index;
        __s32 result;
        __u64 buf_addr;
};

struct batch_commit_buf {
        unsigned short q_id;
        unsigned short buf_idx;
        void *elem;
        unsigned short done;
        unsigned short count;
};

struct batch_fetch_buf {
        struct io_uring_buf_ring *br;
        void *fetch_buf;
        unsigned int fetch_buf_size;
        unsigned int fetch_buf_off;
};

struct ublk_thread {
        /* Thread-local copy of queue-to-thread mapping for this thread */
        unsigned char q_map[UBLK_MAX_QUEUES];

        struct ublk_dev *dev;
        unsigned short idx;
        unsigned short nr_queues;

#define UBLKS_T_STOPPING        (1U << 0)
#define UBLKS_T_IDLE    (1U << 1)
#define UBLKS_T_BATCH_IO        (1U << 31)      /* readonly */
        unsigned state;
        unsigned int cmd_inflight;
        unsigned int io_inflight;

        unsigned short nr_bufs;

       /* followings are for BATCH_IO */
        unsigned short commit_buf_start;
        unsigned char  commit_buf_elem_size;
       /*
        * We just support single device, so pre-calculate commit/prep flags
        */
        unsigned short cmd_flags;
        unsigned int   nr_commit_buf;
        unsigned int   commit_buf_size;
        void *commit_buf;
#define UBLKS_T_COMMIT_BUF_INV_IDX  ((unsigned short)-1)
        struct allocator commit_buf_alloc;
        struct batch_commit_buf *commit;
        /* FETCH_IO_CMDS buffer */
        unsigned short nr_fetch_bufs;
        struct batch_fetch_buf *fetch;

        struct io_uring ring;
};

struct ublk_dev {
        struct ublk_tgt tgt;
        struct ublksrv_ctrl_dev_info  dev_info;
        struct ublk_queue q[UBLK_MAX_QUEUES];
        unsigned nthreads;
        unsigned per_io_tasks;

        int fds[MAX_BACK_FILES + 1];    /* fds[0] points to /dev/ublkcN */
        int nr_fds;
        int ctrl_fd;
        struct io_uring ring;

        void *private_data;
};

extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io);

static inline int __ublk_use_batch_io(__u64 flags)
{
        return flags & UBLK_F_BATCH_IO;
}

static inline int ublk_queue_batch_io(const struct ublk_queue *q)
{
        return __ublk_use_batch_io(q->flags);
}

static inline int ublk_dev_batch_io(const struct ublk_dev *dev)
{
        return __ublk_use_batch_io(dev->dev_info.flags);
}

/* only work for handle single device in this pthread context */
static inline int ublk_thread_batch_io(const struct ublk_thread *t)
{
        return t->state & UBLKS_T_BATCH_IO;
}

static inline void ublk_set_integrity_params(const struct dev_ctx *ctx,
                                             struct ublk_params *params)
{
        if (!ctx->metadata_size)
                return;

        params->types |= UBLK_PARAM_TYPE_INTEGRITY;
        params->integrity = (struct ublk_param_integrity) {
                .flags = ctx->integrity_flags,
                .interval_exp = params->basic.logical_bs_shift,
                .metadata_size = ctx->metadata_size,
                .pi_offset = ctx->pi_offset,
                .csum_type = ctx->csum_type,
                .tag_size = ctx->tag_size,
        };
}

static inline size_t ublk_integrity_len(const struct ublk_queue *q, size_t len)
{
        /* All targets currently use interval_exp = logical_bs_shift = 9 */
        return (len >> 9) * q->metadata_size;
}

static inline size_t
ublk_integrity_data_len(const struct ublk_queue *q, size_t integrity_len)
{
        return (integrity_len / q->metadata_size) << 9;
}

static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod)
{
        return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF);
}

static inline __u64 ublk_user_copy_offset(unsigned q_id, unsigned tag)
{
        return UBLKSRV_IO_BUF_OFFSET +
               ((__u64)q_id << UBLK_QID_OFF | (__u64)tag << UBLK_TAG_OFF);
}

static inline int is_target_io(__u64 user_data)
{
        return (user_data & (1ULL << 63)) != 0;
}

static inline __u64 build_user_data(unsigned tag, unsigned op,
                unsigned tgt_data, unsigned q_id, unsigned is_target_io)
{
        /* we only have 7 bits to encode q_id */
        _Static_assert(UBLK_MAX_QUEUES_SHIFT <= 7, "UBLK_MAX_QUEUES_SHIFT must be <= 7");
        ublk_assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16) && !(q_id >> 7));

        return tag | ((__u64)op << 16) | ((__u64)tgt_data << 24) |
                (__u64)q_id << 56 | (__u64)is_target_io << 63;
}

static inline unsigned int user_data_to_tag(__u64 user_data)
{
        return user_data & 0xffff;
}

static inline unsigned int user_data_to_op(__u64 user_data)
{
        return (user_data >> 16) & 0xff;
}

static inline unsigned int user_data_to_tgt_data(__u64 user_data)
{
        return (user_data >> 24) & 0xffff;
}

static inline unsigned int user_data_to_q_id(__u64 user_data)
{
        return (user_data >> 56) & 0x7f;
}

static inline unsigned short ublk_cmd_op_nr(unsigned int op)
{
        return _IOC_NR(op);
}

static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io)
{
        return container_of(io, struct ublk_queue, ios[io->tag]);
}

static inline int ublk_io_alloc_sqes(struct ublk_thread *t,
                struct io_uring_sqe *sqes[], int nr_sqes)
{
        struct io_uring *ring = &t->ring;
        unsigned left = io_uring_sq_space_left(ring);
        int i;

        if (left < nr_sqes)
                io_uring_submit(ring);

        for (i = 0; i < nr_sqes; i++) {
                sqes[i] = io_uring_get_sqe(ring);
                if (!sqes[i])
                        return i;
        }

        return nr_sqes;
}

static inline int ublk_get_registered_fd(struct ublk_queue *q, int fd_index)
{
        if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) {
                if (fd_index == 0)
                        /* Return the raw ublk FD for index 0 */
                        return q->ublk_fd;
                /* Adjust index for backing files (index 1 becomes 0, etc.) */
                return fd_index - 1;
        }
        return fd_index;
}

static inline void __io_uring_prep_buf_reg_unreg(struct io_uring_sqe *sqe,
                struct ublk_queue *q, int tag, int q_id, __u64 index)
{
        struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd;
        int dev_fd = ublk_get_registered_fd(q, 0);

        io_uring_prep_read(sqe, dev_fd, 0, 0, 0);
        sqe->opcode             = IORING_OP_URING_CMD;
        if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD)
                sqe->flags      &= ~IOSQE_FIXED_FILE;
        else
                sqe->flags      |= IOSQE_FIXED_FILE;

        cmd->tag                = tag;
        cmd->addr               = index;
        cmd->q_id               = q_id;
}

static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe,
                struct ublk_queue *q, int tag, int q_id, __u64 index)
{
        __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index);
        sqe->cmd_op             = UBLK_U_IO_REGISTER_IO_BUF;
}

static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe,
                struct ublk_queue *q, int tag, int q_id, __u64 index)
{
        __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index);
        sqe->cmd_op             = UBLK_U_IO_UNREGISTER_IO_BUF;
}

static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
{
        return (void *)&sqe->cmd;
}

static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res)
{
        q->ios[tag].result = res;
}

static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag)
{
        return q->ios[tag].result;
}

static inline void ublk_mark_io_done(struct ublk_io *io, int res)
{
        io->flags |= (UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_FREE);
        io->result = res;
}

static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag)
{
        return &q->io_cmd_buf[tag];
}

static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op)
{
        __u32 *addr = (__u32 *)&sqe->off;

        addr[0] = cmd_op;
        addr[1] = 0;
}

static inline unsigned short ublk_batch_io_buf_idx(
                const struct ublk_thread *t, const struct ublk_queue *q,
                unsigned tag);

static inline unsigned short ublk_io_buf_idx(const struct ublk_thread *t,
                                             const struct ublk_queue *q,
                                             unsigned tag)
{
        if (ublk_queue_batch_io(q))
                return ublk_batch_io_buf_idx(t, q, tag);
        return q->ios[tag].buf_index;
}

static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag)
{
        return &q->ios[tag];
}

static inline int ublk_completed_tgt_io(struct ublk_thread *t,
                                        struct ublk_queue *q, unsigned tag)
{
        struct ublk_io *io = ublk_get_io(q, tag);

        t->io_inflight--;

        return --io->tgt_ios == 0;
}

static inline bool ublk_queue_use_zc(const struct ublk_queue *q)
{
        return !!(q->flags & UBLK_F_SUPPORT_ZERO_COPY);
}

static inline bool ublk_queue_use_auto_zc(const struct ublk_queue *q)
{
        return !!(q->flags & UBLK_F_AUTO_BUF_REG);
}

static inline bool ublk_queue_auto_zc_fallback(const struct ublk_queue *q)
{
        return !!(q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK);
}

static inline bool ublk_queue_use_user_copy(const struct ublk_queue *q)
{
        return !!(q->flags & UBLK_F_USER_COPY);
}

static inline int ublk_queue_no_buf(const struct ublk_queue *q)
{
        return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q);
}

static inline int ublk_batch_commit_prepared(struct batch_commit_buf *cb)
{
        return cb->buf_idx != UBLKS_T_COMMIT_BUF_INV_IDX;
}

static inline unsigned ublk_queue_idx_in_thread(const struct ublk_thread *t,
                                                const struct ublk_queue *q)
{
        unsigned char idx;

        idx = t->q_map[q->q_id];
        ublk_assert(idx != 0);
        return idx - 1;
}

/*
 * Each IO's buffer index has to be calculated by this helper for
 * UBLKS_T_BATCH_IO
 */
static inline unsigned short ublk_batch_io_buf_idx(
                const struct ublk_thread *t, const struct ublk_queue *q,
                unsigned tag)
{
        return ublk_queue_idx_in_thread(t, q) * q->q_depth + tag;
}

/* Queue UBLK_U_IO_PREP_IO_CMDS for a specific queue with batch elements */
int ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q);
/* Start fetching I/O commands using multishot UBLK_U_IO_FETCH_IO_CMDS */
void ublk_batch_start_fetch(struct ublk_thread *t);
/* Handle completion of batch I/O commands (prep/commit) */
void ublk_batch_compl_cmd(struct ublk_thread *t,
                          const struct io_uring_cqe *cqe);
/* Initialize batch I/O state and calculate buffer parameters */
void ublk_batch_prepare(struct ublk_thread *t);
/* Allocate and register commit buffers for batch operations */
int ublk_batch_alloc_buf(struct ublk_thread *t);
/* Free commit buffers and cleanup batch allocator */
void ublk_batch_free_buf(struct ublk_thread *t);

/* Prepare a new commit buffer for batching completed I/O operations */
void ublk_batch_prep_commit(struct ublk_thread *t);
/* Submit UBLK_U_IO_COMMIT_IO_CMDS with batched completed I/O operations */
void ublk_batch_commit_io_cmds(struct ublk_thread *t);
/* Add a completed I/O operation to the current batch commit buffer */
void ublk_batch_complete_io(struct ublk_thread *t, struct ublk_queue *q,
                            unsigned tag, int res);
void ublk_batch_setup_map(unsigned char (*q_thread_map)[UBLK_MAX_QUEUES],
                           int nthreads, int queues);

static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q,
                                   unsigned tag, int res)
{
        if (ublk_queue_batch_io(q)) {
                ublk_batch_complete_io(t, q, tag, res);
                return 0;
        } else {
                struct ublk_io *io = &q->ios[tag];

                ublk_mark_io_done(io, res);
                return ublk_queue_io_cmd(t, io);
        }
}

static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q,
                                      unsigned tag, int queued)
{
        if (queued < 0)
                ublk_complete_io(t, q, tag, queued);
        else {
                struct ublk_io *io = ublk_get_io(q, tag);

                t->io_inflight += queued;
                io->tgt_ios = queued;
                io->result = 0;
        }
}

extern const struct ublk_tgt_ops null_tgt_ops;
extern const struct ublk_tgt_ops loop_tgt_ops;
extern const struct ublk_tgt_ops stripe_tgt_ops;
extern const struct ublk_tgt_ops fault_inject_tgt_ops;

void backing_file_tgt_deinit(struct ublk_dev *dev);
int backing_file_tgt_init(struct ublk_dev *dev, unsigned int nr_direct);

#endif