root/drivers/infiniband/ulp/rtrs/rtrs-pri.h
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * RDMA Transport Layer
 *
 * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
 * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
 * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
 */

#ifndef RTRS_PRI_H
#define RTRS_PRI_H

#include <linux/uuid.h>
#include <rdma/rdma_cm.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib.h>

#include "rtrs.h"

#define RTRS_PROTO_VER_MAJOR 2
#define RTRS_PROTO_VER_MINOR 0

#define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \
                               __stringify(RTRS_PROTO_VER_MINOR)

/*
 * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
 * and the minimum chunk size is 4096 (2^12).
 * So the maximum sess_queue_depth is 65535 (2^16 - 1) in theory
 * since queue_depth in rtrs_msg_conn_rsp is defined as le16.
 * Therefore the pratical max value of sess_queue_depth is
 * somewhere between 1 and 65535 and it depends on the system.
 */
#define MAX_SESS_QUEUE_DEPTH 65535

enum rtrs_imm_const {
        MAX_IMM_TYPE_BITS = 4,
        MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1),
        MAX_IMM_PAYL_BITS = 28,
        MAX_IMM_PAYL_MASK = ((1 << MAX_IMM_PAYL_BITS) - 1),
};

enum rtrs_imm_type {
        RTRS_IO_REQ_IMM       = 0, /* client to server */
        RTRS_IO_RSP_IMM       = 1, /* server to client */
        RTRS_IO_RSP_W_INV_IMM = 2, /* server to client */

        RTRS_HB_MSG_IMM = 8, /* HB: HeartBeat */
        RTRS_HB_ACK_IMM = 9,

        RTRS_LAST_IMM,
};

enum {
        SERVICE_CON_QUEUE_DEPTH = 512,

        MAX_PATHS_NUM = 128,

        MIN_CHUNK_SIZE = 8192,

        RTRS_HB_INTERVAL_MS = 5000,
        RTRS_HB_MISSED_MAX = 5,

        RTRS_MAGIC = 0x1BBD,
        RTRS_PROTO_VER = (RTRS_PROTO_VER_MAJOR << 8) | RTRS_PROTO_VER_MINOR,
};

struct rtrs_ib_dev;

struct rtrs_rdma_dev_pd_ops {
        int (*init)(struct rtrs_ib_dev *dev);
        void (*deinit)(struct rtrs_ib_dev *dev);
};

struct rtrs_rdma_dev_pd {
        struct mutex            mutex;
        struct list_head        list;
        enum ib_pd_flags        pd_flags;
        const struct rtrs_rdma_dev_pd_ops *ops;
};

struct rtrs_ib_dev {
        struct ib_device         *ib_dev;
        struct ib_pd             *ib_pd;
        struct kref              ref;
        struct list_head         entry;
        struct rtrs_rdma_dev_pd *pool;
        struct ib_event_handler  event_handler;
};

struct rtrs_con {
        struct rtrs_path        *path;
        struct ib_qp            *qp;
        struct ib_cq            *cq;
        struct rdma_cm_id       *cm_id;
        unsigned int            cid;
        int                     nr_cqe;
        atomic_t                wr_cnt;
        atomic_t                sq_wr_avail;
};

struct rtrs_path {
        struct list_head        entry;
        struct sockaddr_storage dst_addr;
        struct sockaddr_storage src_addr;
        char                    sessname[NAME_MAX];
        uuid_t                  uuid;
        struct rtrs_con **con;
        unsigned int            con_num;
        unsigned int            irq_con_num;
        unsigned int            recon_cnt;
        unsigned int            signal_interval;
        struct rtrs_ib_dev      *dev;
        int                     dev_ref;
        struct ib_cqe           *hb_cqe;
        void                    (*hb_err_handler)(struct rtrs_con *con);
        struct workqueue_struct *hb_wq;
        struct delayed_work     hb_dwork;
        unsigned int            hb_interval_ms;
        unsigned int            hb_missed_cnt;
        unsigned int            hb_missed_max;
        ktime_t                 hb_last_sent;
        ktime_t                 hb_cur_latency;
};

/* rtrs information unit */
struct rtrs_iu {
        struct ib_cqe           cqe;
        dma_addr_t              dma_addr;
        void                    *buf;
        size_t                  size;
        enum dma_data_direction direction;
};

/**
 * enum rtrs_msg_types - RTRS message types, see also rtrs/README
 * @RTRS_MSG_INFO_REQ:          Client additional info request to the server
 * @RTRS_MSG_INFO_RSP:          Server additional info response to the client
 * @RTRS_MSG_WRITE:             Client writes data per RDMA to server
 * @RTRS_MSG_READ:              Client requests data transfer from server
 * @RTRS_MSG_RKEY_RSP:          Server refreshed rkey for rbuf
 */
enum rtrs_msg_types {
        RTRS_MSG_INFO_REQ,
        RTRS_MSG_INFO_RSP,
        RTRS_MSG_WRITE,
        RTRS_MSG_READ,
        RTRS_MSG_RKEY_RSP,
};

/**
 * enum rtrs_msg_flags - RTRS message flags.
 * @RTRS_MSG_NEED_INVAL_F: Send invalidation in response.
 * @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response.
 */
enum rtrs_msg_flags {
        RTRS_MSG_NEED_INVAL_F = 1 << 0,
        RTRS_MSG_NEW_RKEY_F = 1 << 1,
};

/**
 * struct rtrs_sg_desc - RDMA-Buffer entry description
 * @addr:       Address of RDMA destination buffer
 * @key:        Authorization rkey to write to the buffer
 * @len:        Size of the buffer
 */
struct rtrs_sg_desc {
        __le64                  addr;
        __le32                  key;
        __le32                  len;
};

/**
 * struct rtrs_msg_conn_req - Client connection request to the server
 * @magic:         RTRS magic
 * @version:       RTRS protocol version
 * @cid:           Current connection id
 * @cid_num:       Number of connections per session
 * @recon_cnt:     Reconnections counter
 * @sess_uuid:     UUID of a session (path)
 * @paths_uuid:    UUID of a group of sessions (paths)
 * @first_conn:    %1 if the connection request is the first for that session,
 *                      otherwise %0
 * NOTE: max size 56 bytes, see man rdma_connect().
 */
struct rtrs_msg_conn_req {
        /**
         * @__cma_version: Is set to 0 by cma.c in case of AF_IB, do not touch
         * that. See https://www.spinics.net/lists/linux-rdma/msg22397.html
         */
        u8              __cma_version;
        /**
         * @__ip_version: On sender side that should be set to 0, or
         * cma_save_ip_info() extract garbage and will fail.
         */
        u8              __ip_version;
        __le16          magic;
        __le16          version;
        __le16          cid;
        __le16          cid_num;
        __le16          recon_cnt;
        uuid_t          sess_uuid;
        uuid_t          paths_uuid;
        u8              first_conn : 1;
        /* private: */
        u8              reserved_bits : 7;
        u8              reserved[11];
};

/**
 * struct rtrs_msg_conn_rsp - Server connection response to the client
 * @magic:         RTRS magic
 * @version:       RTRS protocol version
 * @errno:         If rdma_accept() then 0, if rdma_reject() indicates error
 * @queue_depth:   max inflight messages (queue-depth) in this session
 * @max_io_size:   max io size server supports
 * @max_hdr_size:  max msg header size server supports
 * @flags:         RTRS message flags for this message
 *
 * NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept().
 */
struct rtrs_msg_conn_rsp {
        __le16          magic;
        __le16          version;
        __le16          errno;
        __le16          queue_depth;
        __le32          max_io_size;
        __le32          max_hdr_size;
        __le32          flags;
        /* private: */
        u8              reserved[36];
};

/**
 * struct rtrs_msg_info_req - client additional info request
 * @type:               @RTRS_MSG_INFO_REQ
 * @pathname:           Path name chosen by client
 */
struct rtrs_msg_info_req {
        __le16          type;
        u8              pathname[NAME_MAX];
        /* private: */
        u8              reserved[15];
};

/**
 * struct rtrs_msg_info_rsp - server additional info response
 * @type:               @RTRS_MSG_INFO_RSP
 * @sg_cnt:             Number of @desc entries
 * @desc:               RDMA buffers where the client can write to server
 */
struct rtrs_msg_info_rsp {
        __le16          type;
        __le16          sg_cnt;
        /* private: */
        u8              reserved[4];
        /* public: */
        struct rtrs_sg_desc desc[];
};

/**
 * struct rtrs_msg_rkey_rsp - server refreshed rkey response
 * @type:               @RTRS_MSG_RKEY_RSP
 * @buf_id:             RDMA buf_id of the new rkey
 * @rkey:               new remote key for RDMA buffers id from server
 */
struct rtrs_msg_rkey_rsp {
        __le16          type;
        __le16          buf_id;
        __le32          rkey;
};

/**
 * struct rtrs_msg_rdma_read - RDMA data transfer request from client
 * @type:               always @RTRS_MSG_READ
 * @flags:              RTRS message flags (enum rtrs_msg_flags)
 * @usr_len:            length of user payload
 * @sg_cnt:             number of @desc entries
 * @desc:               RDMA buffers where the server can write the result to
 */
struct rtrs_msg_rdma_read {
        __le16                  type;
        __le16                  usr_len;
        __le16                  flags;
        __le16                  sg_cnt;
        struct rtrs_sg_desc    desc[];
};

/**
 * struct rtrs_msg_rdma_write - Message transferred to server with RDMA-Write
 * @type:               always @RTRS_MSG_WRITE
 * @usr_len:            length of user payload
 */
struct rtrs_msg_rdma_write {
        __le16                  type;
        __le16                  usr_len;
};

/**
 * struct rtrs_msg_rdma_hdr - header for read or write request
 * @type:               @RTRS_MSG_WRITE | @RTRS_MSG_READ
 */
struct rtrs_msg_rdma_hdr {
        __le16                  type;
};

/* rtrs.c */

struct rtrs_iu *rtrs_iu_alloc(u32 queue_num, size_t size, gfp_t t,
                              struct ib_device *dev, enum dma_data_direction,
                              void (*done)(struct ib_cq *cq, struct ib_wc *wc));
void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_num);
int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu);
int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
                      struct ib_send_wr *head);
int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
                                struct ib_sge *sge, unsigned int num_sge,
                                u32 rkey, u64 rdma_addr, u32 imm_data,
                                enum ib_send_flags flags,
                                struct ib_send_wr *head,
                                struct ib_send_wr *tail);

int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe);

int rtrs_cq_qp_create(struct rtrs_path *path, struct rtrs_con *con,
                      u32 max_send_sge, int cq_vector, int nr_cqe,
                      u32 max_send_wr, u32 max_recv_wr,
                      enum ib_poll_context poll_ctx);
void rtrs_cq_qp_destroy(struct rtrs_con *con);

void rtrs_init_hb(struct rtrs_path *path, struct ib_cqe *cqe,
                  unsigned int interval_ms, unsigned int missed_max,
                  void (*err_handler)(struct rtrs_con *con),
                  struct workqueue_struct *wq);
void rtrs_start_hb(struct rtrs_path *path);
void rtrs_stop_hb(struct rtrs_path *path);
void rtrs_send_hb_ack(struct rtrs_path *path);

void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
                           struct rtrs_rdma_dev_pd *pool);
void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool);

struct rtrs_ib_dev *rtrs_ib_dev_find_or_add(struct ib_device *ib_dev,
                                            struct rtrs_rdma_dev_pd *pool);
int rtrs_ib_dev_put(struct rtrs_ib_dev *dev);

static inline u32 rtrs_to_imm(u32 type, u32 payload)
{
        BUILD_BUG_ON(MAX_IMM_PAYL_BITS + MAX_IMM_TYPE_BITS != 32);
        BUILD_BUG_ON(RTRS_LAST_IMM > (1<<MAX_IMM_TYPE_BITS));
        return ((type & MAX_IMM_TYPE_MASK) << MAX_IMM_PAYL_BITS) |
                (payload & MAX_IMM_PAYL_MASK);
}

static inline void rtrs_from_imm(u32 imm, u32 *type, u32 *payload)
{
        *payload = imm & MAX_IMM_PAYL_MASK;
        *type = imm >> MAX_IMM_PAYL_BITS;
}

static inline u32 rtrs_to_io_req_imm(u32 addr)
{
        return rtrs_to_imm(RTRS_IO_REQ_IMM, addr);
}

static inline u32 rtrs_to_io_rsp_imm(u32 msg_id, int errno, bool w_inval)
{
        enum rtrs_imm_type type;
        u32 payload;

        /* 9 bits for errno, 19 bits for msg_id */
        payload = (abs(errno) & 0x1ff) << 19 | (msg_id & 0x7ffff);
        type = w_inval ? RTRS_IO_RSP_W_INV_IMM : RTRS_IO_RSP_IMM;

        return rtrs_to_imm(type, payload);
}

static inline void rtrs_from_io_rsp_imm(u32 payload, u32 *msg_id, int *errno)
{
        /* 9 bits for errno, 19 bits for msg_id */
        *msg_id = payload & 0x7ffff;
        *errno = -(int)((payload >> 19) & 0x1ff);
}

#define STAT_STORE_FUNC(type, set_value, reset)                         \
static ssize_t set_value##_store(struct kobject *kobj,                  \
                             struct kobj_attribute *attr,               \
                             const char *buf, size_t count)             \
{                                                                       \
        int ret = -EINVAL;                                              \
        type *stats = container_of(kobj, type, kobj_stats);             \
                                                                        \
        if (sysfs_streq(buf, "1"))                                      \
                ret = reset(stats, true);                       \
        else if (sysfs_streq(buf, "0"))                                 \
                ret = reset(stats, false);                      \
        if (ret)                                                        \
                return ret;                                             \
                                                                        \
        return count;                                                   \
}

#define STAT_SHOW_FUNC(type, get_value, print)                          \
static ssize_t get_value##_show(struct kobject *kobj,                   \
                           struct kobj_attribute *attr,                 \
                           char *page)                                  \
{                                                                       \
        type *stats = container_of(kobj, type, kobj_stats);             \
                                                                        \
        return print(stats, page);                      \
}

#define STAT_ATTR(type, stat, print, reset)                             \
STAT_STORE_FUNC(type, stat, reset)                                      \
STAT_SHOW_FUNC(type, stat, print)                                       \
static struct kobj_attribute stat##_attr = __ATTR_RW(stat)

#endif /* RTRS_PRI_H */