#ifndef KUBLK_INTERNAL_H
#define KUBLK_INTERNAL_H
#include <unistd.h>
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <pthread.h>
#include <getopt.h>
#include <limits.h>
#include <poll.h>
#include <fcntl.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/inotify.h>
#include <sys/wait.h>
#include <sys/eventfd.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <linux/io_uring.h>
#include <liburing.h>
#include <semaphore.h>
#include "ublk_dep.h"
#include <linux/ublk_cmd.h>
#include "utils.h"
#define MAX_BACK_FILES 4
#define CTRL_DEV "/dev/ublk-control"
#define UBLKC_DEV "/dev/ublkc"
#define UBLKB_DEV "/dev/ublkb"
#define UBLK_CTRL_RING_DEPTH 32
#define ERROR_EVTFD_DEVID -2
#define UBLK_IO_MAX_BYTES (1 << 20)
#define UBLK_MAX_QUEUES_SHIFT 5
#define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT)
#define UBLK_MAX_THREADS_SHIFT 5
#define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT)
#define UBLK_QUEUE_DEPTH 1024
struct ublk_dev;
struct ublk_queue;
struct ublk_thread;
struct stripe_ctx {
unsigned int chunk_size;
};
struct fault_inject_ctx {
unsigned long delay_us;
};
struct dev_ctx {
char tgt_type[16];
unsigned long flags;
unsigned nr_hw_queues;
unsigned short nthreads;
unsigned queue_depth;
int dev_id;
int nr_files;
char *files[MAX_BACK_FILES];
unsigned int logging:1;
unsigned int all:1;
unsigned int fg:1;
unsigned int recovery:1;
unsigned int auto_zc_fallback:1;
unsigned int per_io_tasks:1;
unsigned int no_ublk_fixed_fd:1;
unsigned int safe_stop:1;
unsigned int no_auto_part_scan:1;
__u32 integrity_flags;
__u8 metadata_size;
__u8 pi_offset;
__u8 csum_type;
__u8 tag_size;
int _evtfd;
int _shmid;
struct ublk_dev *shadow_dev;
unsigned long long size;
union {
struct stripe_ctx stripe;
struct fault_inject_ctx fault_inject;
};
};
struct ublk_ctrl_cmd_data {
__u32 cmd_op;
#define CTRL_CMD_HAS_DATA 1
#define CTRL_CMD_HAS_BUF 2
__u32 flags;
__u64 data[2];
__u64 addr;
__u32 len;
};
struct ublk_io {
char *buf_addr;
void *integrity_buf;
#define UBLKS_IO_NEED_FETCH_RQ (1UL << 0)
#define UBLKS_IO_NEED_COMMIT_RQ_COMP (1UL << 1)
#define UBLKS_IO_FREE (1UL << 2)
#define UBLKS_IO_NEED_GET_DATA (1UL << 3)
#define UBLKS_IO_NEED_REG_BUF (1UL << 4)
unsigned short flags;
unsigned short refs;
int tag;
int result;
unsigned short buf_index;
unsigned short tgt_ios;
void *private_data;
};
struct ublk_tgt_ops {
const char *name;
int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *);
void (*deinit_tgt)(struct ublk_dev *);
int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag);
void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *,
const struct io_uring_cqe *);
void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]);
void (*usage)(const struct ublk_tgt_ops *ops);
unsigned short (*buf_index)(const struct ublk_thread *t,
const struct ublk_queue *, int tag);
};
struct ublk_tgt {
unsigned long dev_size;
unsigned int sq_depth;
unsigned int cq_depth;
const struct ublk_tgt_ops *ops;
struct ublk_params params;
int nr_backing_files;
unsigned long backing_file_size[MAX_BACK_FILES];
char backing_file[MAX_BACK_FILES][PATH_MAX];
};
struct ublk_queue {
int q_id;
int q_depth;
struct ublk_dev *dev;
const struct ublk_tgt_ops *tgt_ops;
struct ublksrv_io_desc *io_cmd_buf;
#define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63)
#define UBLKS_Q_NO_UBLK_FIXED_FD (1ULL << 62)
#define UBLKS_Q_PREPARED (1ULL << 61)
__u64 flags;
int ublk_fd;
__u8 metadata_size;
struct ublk_io ios[UBLK_QUEUE_DEPTH];
pthread_spinlock_t lock;
};
struct ublk_batch_elem {
__u16 tag;
__u16 buf_index;
__s32 result;
__u64 buf_addr;
};
struct batch_commit_buf {
unsigned short q_id;
unsigned short buf_idx;
void *elem;
unsigned short done;
unsigned short count;
};
struct batch_fetch_buf {
struct io_uring_buf_ring *br;
void *fetch_buf;
unsigned int fetch_buf_size;
unsigned int fetch_buf_off;
};
struct ublk_thread {
unsigned char q_map[UBLK_MAX_QUEUES];
struct ublk_dev *dev;
unsigned short idx;
unsigned short nr_queues;
#define UBLKS_T_STOPPING (1U << 0)
#define UBLKS_T_IDLE (1U << 1)
#define UBLKS_T_BATCH_IO (1U << 31)
unsigned state;
unsigned int cmd_inflight;
unsigned int io_inflight;
unsigned short nr_bufs;
unsigned short commit_buf_start;
unsigned char commit_buf_elem_size;
unsigned short cmd_flags;
unsigned int nr_commit_buf;
unsigned int commit_buf_size;
void *commit_buf;
#define UBLKS_T_COMMIT_BUF_INV_IDX ((unsigned short)-1)
struct allocator commit_buf_alloc;
struct batch_commit_buf *commit;
unsigned short nr_fetch_bufs;
struct batch_fetch_buf *fetch;
struct io_uring ring;
};
struct ublk_dev {
struct ublk_tgt tgt;
struct ublksrv_ctrl_dev_info dev_info;
struct ublk_queue q[UBLK_MAX_QUEUES];
unsigned nthreads;
unsigned per_io_tasks;
int fds[MAX_BACK_FILES + 1];
int nr_fds;
int ctrl_fd;
struct io_uring ring;
void *private_data;
};
extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io);
static inline int __ublk_use_batch_io(__u64 flags)
{
return flags & UBLK_F_BATCH_IO;
}
static inline int ublk_queue_batch_io(const struct ublk_queue *q)
{
return __ublk_use_batch_io(q->flags);
}
static inline int ublk_dev_batch_io(const struct ublk_dev *dev)
{
return __ublk_use_batch_io(dev->dev_info.flags);
}
static inline int ublk_thread_batch_io(const struct ublk_thread *t)
{
return t->state & UBLKS_T_BATCH_IO;
}
static inline void ublk_set_integrity_params(const struct dev_ctx *ctx,
struct ublk_params *params)
{
if (!ctx->metadata_size)
return;
params->types |= UBLK_PARAM_TYPE_INTEGRITY;
params->integrity = (struct ublk_param_integrity) {
.flags = ctx->integrity_flags,
.interval_exp = params->basic.logical_bs_shift,
.metadata_size = ctx->metadata_size,
.pi_offset = ctx->pi_offset,
.csum_type = ctx->csum_type,
.tag_size = ctx->tag_size,
};
}
static inline size_t ublk_integrity_len(const struct ublk_queue *q, size_t len)
{
return (len >> 9) * q->metadata_size;
}
static inline size_t
ublk_integrity_data_len(const struct ublk_queue *q, size_t integrity_len)
{
return (integrity_len / q->metadata_size) << 9;
}
static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod)
{
return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF);
}
static inline __u64 ublk_user_copy_offset(unsigned q_id, unsigned tag)
{
return UBLKSRV_IO_BUF_OFFSET +
((__u64)q_id << UBLK_QID_OFF | (__u64)tag << UBLK_TAG_OFF);
}
static inline int is_target_io(__u64 user_data)
{
return (user_data & (1ULL << 63)) != 0;
}
static inline __u64 build_user_data(unsigned tag, unsigned op,
unsigned tgt_data, unsigned q_id, unsigned is_target_io)
{
_Static_assert(UBLK_MAX_QUEUES_SHIFT <= 7, "UBLK_MAX_QUEUES_SHIFT must be <= 7");
ublk_assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16) && !(q_id >> 7));
return tag | ((__u64)op << 16) | ((__u64)tgt_data << 24) |
(__u64)q_id << 56 | (__u64)is_target_io << 63;
}
static inline unsigned int user_data_to_tag(__u64 user_data)
{
return user_data & 0xffff;
}
static inline unsigned int user_data_to_op(__u64 user_data)
{
return (user_data >> 16) & 0xff;
}
static inline unsigned int user_data_to_tgt_data(__u64 user_data)
{
return (user_data >> 24) & 0xffff;
}
static inline unsigned int user_data_to_q_id(__u64 user_data)
{
return (user_data >> 56) & 0x7f;
}
static inline unsigned short ublk_cmd_op_nr(unsigned int op)
{
return _IOC_NR(op);
}
static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io)
{
return container_of(io, struct ublk_queue, ios[io->tag]);
}
static inline int ublk_io_alloc_sqes(struct ublk_thread *t,
struct io_uring_sqe *sqes[], int nr_sqes)
{
struct io_uring *ring = &t->ring;
unsigned left = io_uring_sq_space_left(ring);
int i;
if (left < nr_sqes)
io_uring_submit(ring);
for (i = 0; i < nr_sqes; i++) {
sqes[i] = io_uring_get_sqe(ring);
if (!sqes[i])
return i;
}
return nr_sqes;
}
static inline int ublk_get_registered_fd(struct ublk_queue *q, int fd_index)
{
if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) {
if (fd_index == 0)
return q->ublk_fd;
return fd_index - 1;
}
return fd_index;
}
static inline void __io_uring_prep_buf_reg_unreg(struct io_uring_sqe *sqe,
struct ublk_queue *q, int tag, int q_id, __u64 index)
{
struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd;
int dev_fd = ublk_get_registered_fd(q, 0);
io_uring_prep_read(sqe, dev_fd, 0, 0, 0);
sqe->opcode = IORING_OP_URING_CMD;
if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD)
sqe->flags &= ~IOSQE_FIXED_FILE;
else
sqe->flags |= IOSQE_FIXED_FILE;
cmd->tag = tag;
cmd->addr = index;
cmd->q_id = q_id;
}
static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe,
struct ublk_queue *q, int tag, int q_id, __u64 index)
{
__io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index);
sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF;
}
static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe,
struct ublk_queue *q, int tag, int q_id, __u64 index)
{
__io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index);
sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF;
}
static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
{
return (void *)&sqe->cmd;
}
static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res)
{
q->ios[tag].result = res;
}
static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag)
{
return q->ios[tag].result;
}
static inline void ublk_mark_io_done(struct ublk_io *io, int res)
{
io->flags |= (UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_FREE);
io->result = res;
}
static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag)
{
return &q->io_cmd_buf[tag];
}
static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op)
{
__u32 *addr = (__u32 *)&sqe->off;
addr[0] = cmd_op;
addr[1] = 0;
}
static inline unsigned short ublk_batch_io_buf_idx(
const struct ublk_thread *t, const struct ublk_queue *q,
unsigned tag);
static inline unsigned short ublk_io_buf_idx(const struct ublk_thread *t,
const struct ublk_queue *q,
unsigned tag)
{
if (ublk_queue_batch_io(q))
return ublk_batch_io_buf_idx(t, q, tag);
return q->ios[tag].buf_index;
}
static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag)
{
return &q->ios[tag];
}
static inline int ublk_completed_tgt_io(struct ublk_thread *t,
struct ublk_queue *q, unsigned tag)
{
struct ublk_io *io = ublk_get_io(q, tag);
t->io_inflight--;
return --io->tgt_ios == 0;
}
static inline bool ublk_queue_use_zc(const struct ublk_queue *q)
{
return !!(q->flags & UBLK_F_SUPPORT_ZERO_COPY);
}
static inline bool ublk_queue_use_auto_zc(const struct ublk_queue *q)
{
return !!(q->flags & UBLK_F_AUTO_BUF_REG);
}
static inline bool ublk_queue_auto_zc_fallback(const struct ublk_queue *q)
{
return !!(q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK);
}
static inline bool ublk_queue_use_user_copy(const struct ublk_queue *q)
{
return !!(q->flags & UBLK_F_USER_COPY);
}
static inline int ublk_queue_no_buf(const struct ublk_queue *q)
{
return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q);
}
static inline int ublk_batch_commit_prepared(struct batch_commit_buf *cb)
{
return cb->buf_idx != UBLKS_T_COMMIT_BUF_INV_IDX;
}
static inline unsigned ublk_queue_idx_in_thread(const struct ublk_thread *t,
const struct ublk_queue *q)
{
unsigned char idx;
idx = t->q_map[q->q_id];
ublk_assert(idx != 0);
return idx - 1;
}
static inline unsigned short ublk_batch_io_buf_idx(
const struct ublk_thread *t, const struct ublk_queue *q,
unsigned tag)
{
return ublk_queue_idx_in_thread(t, q) * q->q_depth + tag;
}
int ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q);
void ublk_batch_start_fetch(struct ublk_thread *t);
void ublk_batch_compl_cmd(struct ublk_thread *t,
const struct io_uring_cqe *cqe);
void ublk_batch_prepare(struct ublk_thread *t);
int ublk_batch_alloc_buf(struct ublk_thread *t);
void ublk_batch_free_buf(struct ublk_thread *t);
void ublk_batch_prep_commit(struct ublk_thread *t);
void ublk_batch_commit_io_cmds(struct ublk_thread *t);
void ublk_batch_complete_io(struct ublk_thread *t, struct ublk_queue *q,
unsigned tag, int res);
void ublk_batch_setup_map(unsigned char (*q_thread_map)[UBLK_MAX_QUEUES],
int nthreads, int queues);
static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q,
unsigned tag, int res)
{
if (ublk_queue_batch_io(q)) {
ublk_batch_complete_io(t, q, tag, res);
return 0;
} else {
struct ublk_io *io = &q->ios[tag];
ublk_mark_io_done(io, res);
return ublk_queue_io_cmd(t, io);
}
}
static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q,
unsigned tag, int queued)
{
if (queued < 0)
ublk_complete_io(t, q, tag, queued);
else {
struct ublk_io *io = ublk_get_io(q, tag);
t->io_inflight += queued;
io->tgt_ios = queued;
io->result = 0;
}
}
extern const struct ublk_tgt_ops null_tgt_ops;
extern const struct ublk_tgt_ops loop_tgt_ops;
extern const struct ublk_tgt_ops stripe_tgt_ops;
extern const struct ublk_tgt_ops fault_inject_tgt_ops;
void backing_file_tgt_deinit(struct ublk_dev *dev);
int backing_file_tgt_init(struct ublk_dev *dev, unsigned int nr_direct);
#endif