#include <sys/param.h>
#include <sys/dnv.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/memdesc.h>
#include <sys/module.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/refcount.h>
#include <sys/sbuf.h>
#include <sys/smp.h>
#include <sys/sx.h>
#include <sys/taskqueue.h>
#include <machine/bus.h>
#include <machine/bus_dma.h>
#include <dev/nvmf/nvmf.h>
#include <dev/nvmf/nvmf_transport.h>
#include <dev/nvmf/controller/nvmft_subr.h>
#include <dev/nvmf/controller/nvmft_var.h>
#include <cam/ctl/ctl.h>
#include <cam/ctl/ctl_error.h>
#include <cam/ctl/ctl_ha.h>
#include <cam/ctl/ctl_io.h>
#include <cam/ctl/ctl_frontend.h>
#include <cam/ctl/ctl_private.h>
#define NVMFT_NC(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[0])
#define NVMFT_QP(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[1])
static void nvmft_done(union ctl_io *io);
static int nvmft_init(void);
static int nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data,
int flag, struct thread *td);
static int nvmft_shutdown(void);
static struct taskqueue *nvmft_taskq;
static TAILQ_HEAD(, nvmft_port) nvmft_ports;
static struct sx nvmft_ports_lock;
MALLOC_DEFINE(M_NVMFT, "nvmft", "NVMe over Fabrics controller");
static struct ctl_frontend nvmft_frontend = {
.name = "nvmf",
.init = nvmft_init,
.ioctl = nvmft_ioctl,
.fe_dump = NULL,
.shutdown = nvmft_shutdown,
};
static void
nvmft_online(void *arg)
{
struct nvmft_port *np = arg;
mtx_lock(&np->lock);
np->online = true;
mtx_unlock(&np->lock);
}
static void
nvmft_offline(void *arg)
{
struct nvmft_port *np = arg;
struct nvmft_controller *ctrlr;
mtx_lock(&np->lock);
np->online = false;
TAILQ_FOREACH(ctrlr, &np->controllers, link) {
nvmft_printf(ctrlr,
"shutting down due to port going offline\n");
nvmft_controller_error(ctrlr, NULL, ENODEV);
}
while (!TAILQ_EMPTY(&np->controllers))
mtx_sleep(np, &np->lock, 0, "nvmfoff", 0);
mtx_unlock(&np->lock);
}
static int
nvmft_info(void *arg, struct sbuf *sb)
{
struct nvmft_port *np = arg;
struct nvmft_controller *ctrlr;
int retval;
mtx_lock(&np->lock);
retval = sbuf_printf(sb, "\t<port>%s,p,%u</port>\n", np->cdata.subnqn,
np->portid);
if (retval != 0)
goto out;
TAILQ_FOREACH(ctrlr, &np->controllers, link) {
retval = sbuf_printf(sb, "\t<host id=\"%u\">%s</host>\n",
ctrlr->cntlid, ctrlr->hostnqn);
if (retval != 0)
break;
}
out:
mtx_unlock(&np->lock);
return (retval);
}
static int
nvmft_lun_enable(void *arg, int lun_id)
{
struct nvmft_port *np = arg;
struct nvmft_controller *ctrlr;
uint32_t *old_ns, *new_ns;
uint32_t nsid;
u_int i, new_count;
if (lun_id >= le32toh(np->cdata.nn)) {
printf("NVMFT: %s lun %d larger than maximum nsid %u\n",
np->cdata.subnqn, lun_id, le32toh(np->cdata.nn));
return (EOPNOTSUPP);
}
nsid = lun_id + 1;
mtx_lock(&np->lock);
for (;;) {
new_count = np->num_ns + 1;
mtx_unlock(&np->lock);
new_ns = mallocarray(new_count, sizeof(*new_ns), M_NVMFT,
M_WAITOK);
mtx_lock(&np->lock);
if (np->num_ns + 1 <= new_count)
break;
free(new_ns, M_NVMFT);
}
for (i = 0; i < np->num_ns; i++) {
if (np->active_ns[i] < nsid)
continue;
if (np->active_ns[i] == nsid) {
mtx_unlock(&np->lock);
free(new_ns, M_NVMFT);
printf("NVMFT: %s duplicate lun %d\n",
np->cdata.subnqn, lun_id);
return (EINVAL);
}
break;
}
memcpy(new_ns, np->active_ns, i * sizeof(*np->active_ns));
new_ns[i] = nsid;
memcpy(new_ns + i + 1, np->active_ns + i, (np->num_ns - i) *
sizeof(*np->active_ns));
np->num_ns++;
old_ns = np->active_ns;
np->active_ns = new_ns;
TAILQ_FOREACH(ctrlr, &np->controllers, link) {
nvmft_controller_lun_changed(ctrlr, lun_id);
}
mtx_unlock(&np->lock);
free(old_ns, M_NVMFT);
return (0);
}
static int
nvmft_lun_disable(void *arg, int lun_id)
{
struct nvmft_port *np = arg;
struct nvmft_controller *ctrlr;
uint32_t nsid;
u_int i;
if (lun_id >= le32toh(np->cdata.nn))
return (0);
nsid = lun_id + 1;
mtx_lock(&np->lock);
for (i = 0; i < np->num_ns; i++) {
if (np->active_ns[i] == nsid)
goto found;
}
mtx_unlock(&np->lock);
printf("NVMFT: %s request to disable nonexistent lun %d\n",
np->cdata.subnqn, lun_id);
return (EINVAL);
found:
memmove(np->active_ns + i, np->active_ns + i + 1,
(np->num_ns - (i + 1)) * sizeof(*np->active_ns));
np->num_ns--;
TAILQ_FOREACH(ctrlr, &np->controllers, link) {
nvmft_controller_lun_changed(ctrlr, lun_id);
}
mtx_unlock(&np->lock);
return (0);
}
void
nvmft_populate_active_nslist(struct nvmft_port *np, uint32_t nsid,
struct nvme_ns_list *nslist)
{
u_int i, count;
mtx_lock(&np->lock);
count = 0;
for (i = 0; i < np->num_ns; i++) {
if (np->active_ns[i] <= nsid)
continue;
nslist->ns[count] = htole32(np->active_ns[i]);
count++;
if (count == nitems(nslist->ns))
break;
}
mtx_unlock(&np->lock);
}
void
nvmft_dispatch_command(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
bool admin)
{
struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp);
const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
struct nvmft_port *np = ctrlr->np;
union ctl_io *io;
int error;
if (cmd->nsid == htole32(0)) {
nvmft_send_generic_error(qp, nc,
NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
nvmf_free_capsule(nc);
return;
}
mtx_lock(&ctrlr->lock);
if (ctrlr->pending_commands == 0)
ctrlr->start_busy = sbinuptime();
ctrlr->pending_commands++;
mtx_unlock(&ctrlr->lock);
io = ctl_alloc_io(np->port.ctl_pool_ref);
ctl_zero_io(io);
NVMFT_NC(io) = nc;
NVMFT_QP(io) = qp;
io->io_hdr.io_type = admin ? CTL_IO_NVME_ADMIN : CTL_IO_NVME;
io->io_hdr.nexus.initid = ctrlr->cntlid;
io->io_hdr.nexus.targ_port = np->port.targ_port;
io->io_hdr.nexus.targ_lun = le32toh(cmd->nsid) - 1;
io->nvmeio.cmd = *cmd;
error = ctl_run(io);
if (error != 0) {
nvmft_printf(ctrlr, "ctl_run failed for command on %s: %d\n",
nvmft_qpair_name(qp), error);
ctl_nvme_set_generic_error(&io->nvmeio,
NVME_SC_INTERNAL_DEVICE_ERROR);
nvmft_done(io);
nvmft_controller_error(ctrlr, qp, ENXIO);
}
}
void
nvmft_terminate_commands(struct nvmft_controller *ctrlr)
{
struct nvmft_port *np = ctrlr->np;
union ctl_io *io;
int error;
mtx_lock(&ctrlr->lock);
if (ctrlr->pending_commands == 0)
ctrlr->start_busy = sbinuptime();
ctrlr->pending_commands++;
mtx_unlock(&ctrlr->lock);
io = ctl_alloc_io(np->port.ctl_pool_ref);
ctl_zero_io(io);
NVMFT_QP(io) = ctrlr->admin;
io->io_hdr.io_type = CTL_IO_TASK;
io->io_hdr.nexus.initid = ctrlr->cntlid;
io->io_hdr.nexus.targ_port = np->port.targ_port;
io->io_hdr.nexus.targ_lun = 0;
io->taskio.tag_type = CTL_TAG_SIMPLE;
io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET;
error = ctl_run(io);
if (error != CTL_RETVAL_COMPLETE) {
nvmft_printf(ctrlr, "failed to terminate tasks: %d\n", error);
#ifdef INVARIANTS
io->io_hdr.status = CTL_SUCCESS;
#endif
nvmft_done(io);
}
}
static void
nvmft_datamove_out_cb(void *arg, size_t xfered, int error)
{
struct ctl_nvmeio *ctnio = arg;
if (error != 0) {
ctl_nvme_set_data_transfer_error(ctnio);
} else {
MPASS(xfered == ctnio->kern_data_len);
ctnio->kern_data_resid -= xfered;
}
if (ctnio->kern_sg_entries) {
free(ctnio->ext_data_ptr, M_NVMFT);
ctnio->ext_data_ptr = NULL;
} else
MPASS(ctnio->ext_data_ptr == NULL);
ctl_datamove_done((union ctl_io *)ctnio, false);
}
static void
nvmft_datamove_out(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp,
struct nvmf_capsule *nc)
{
struct memdesc mem;
int error;
MPASS(ctnio->ext_data_ptr == NULL);
if (ctnio->kern_sg_entries > 0) {
struct ctl_sg_entry *sgl;
struct bus_dma_segment *vlist;
vlist = mallocarray(ctnio->kern_sg_entries, sizeof(*vlist),
M_NVMFT, M_WAITOK);
ctnio->ext_data_ptr = (void *)vlist;
sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
for (u_int i = 0; i < ctnio->kern_sg_entries; i++) {
vlist[i].ds_addr = (uintptr_t)sgl[i].addr;
vlist[i].ds_len = sgl[i].len;
}
mem = memdesc_vlist(vlist, ctnio->kern_sg_entries);
} else
mem = memdesc_vaddr(ctnio->kern_data_ptr, ctnio->kern_data_len);
error = nvmf_receive_controller_data(nc, ctnio->kern_rel_offset, &mem,
ctnio->kern_data_len, nvmft_datamove_out_cb, ctnio);
if (error == 0)
return;
nvmft_printf(nvmft_qpair_ctrlr(qp),
"Failed to request capsule data: %d\n", error);
ctl_nvme_set_data_transfer_error(ctnio);
if (ctnio->kern_sg_entries) {
free(ctnio->ext_data_ptr, M_NVMFT);
ctnio->ext_data_ptr = NULL;
} else
MPASS(ctnio->ext_data_ptr == NULL);
ctl_datamove_done((union ctl_io *)ctnio, true);
}
static struct mbuf *
nvmft_copy_data(struct ctl_nvmeio *ctnio)
{
struct ctl_sg_entry *sgl;
struct mbuf *m0, *m;
uint32_t resid, off, todo;
int mlen;
MPASS(ctnio->kern_data_len != 0);
m0 = m_getm2(NULL, ctnio->kern_data_len, M_WAITOK, MT_DATA, 0);
if (ctnio->kern_sg_entries == 0) {
m_copyback(m0, 0, ctnio->kern_data_len, ctnio->kern_data_ptr);
return (m0);
}
resid = ctnio->kern_data_len;
sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
off = 0;
m = m0;
mlen = M_TRAILINGSPACE(m);
for (;;) {
todo = MIN(mlen, sgl->len - off);
memcpy(mtod(m, char *) + m->m_len, (char *)sgl->addr + off,
todo);
m->m_len += todo;
resid -= todo;
if (resid == 0) {
MPASS(m->m_next == NULL);
break;
}
off += todo;
if (off == sgl->len) {
sgl++;
off = 0;
}
mlen -= todo;
if (mlen == 0) {
m = m->m_next;
mlen = M_TRAILINGSPACE(m);
}
}
return (m0);
}
static void
m_free_ref_data(struct mbuf *m)
{
ctl_ref kern_data_ref = m->m_ext.ext_arg1;
kern_data_ref(m->m_ext.ext_arg2, -1);
}
static struct mbuf *
m_get_ref_data(struct ctl_nvmeio *ctnio, void *buf, u_int size)
{
struct mbuf *m;
m = m_get(M_WAITOK, MT_DATA);
m_extadd(m, buf, size, m_free_ref_data, ctnio->kern_data_ref,
ctnio->kern_data_arg, M_RDONLY, EXT_CTL);
m->m_len = size;
ctnio->kern_data_ref(ctnio->kern_data_arg, 1);
return (m);
}
static struct mbuf *
nvmft_ref_data(struct ctl_nvmeio *ctnio)
{
struct ctl_sg_entry *sgl;
struct mbuf *m0, *m;
MPASS(ctnio->kern_data_len != 0);
if (ctnio->kern_sg_entries == 0)
return (m_get_ref_data(ctnio, ctnio->kern_data_ptr,
ctnio->kern_data_len));
sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
m0 = m_get_ref_data(ctnio, sgl[0].addr, sgl[0].len);
m = m0;
for (u_int i = 1; i < ctnio->kern_sg_entries; i++) {
m->m_next = m_get_ref_data(ctnio, sgl[i].addr, sgl[i].len);
m = m->m_next;
}
return (m0);
}
static void
nvmft_datamove_in(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp,
struct nvmf_capsule *nc)
{
struct mbuf *m;
u_int status;
if (ctnio->kern_data_ref != NULL)
m = nvmft_ref_data(ctnio);
else
m = nvmft_copy_data(ctnio);
status = nvmf_send_controller_data(nc, ctnio->kern_rel_offset, m,
ctnio->kern_data_len);
switch (status) {
case NVMF_SUCCESS_SENT:
ctnio->success_sent = true;
nvmft_command_completed(qp, nc);
case NVMF_MORE:
case NVME_SC_SUCCESS:
break;
default:
ctl_nvme_set_generic_error(ctnio, status);
break;
}
ctl_datamove_done((union ctl_io *)ctnio, true);
}
void
nvmft_handle_datamove(union ctl_io *io)
{
struct nvmf_capsule *nc;
struct nvmft_qpair *qp;
MPASS(io->io_hdr.status == CTL_STATUS_NONE ||
io->io_hdr.status == CTL_SUCCESS);
MPASS(!io->nvmeio.success_sent);
nc = NVMFT_NC(io);
qp = NVMFT_QP(io);
if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN)
nvmft_datamove_in(&io->nvmeio, qp, nc);
else
nvmft_datamove_out(&io->nvmeio, qp, nc);
}
void
nvmft_abort_datamove(union ctl_io *io)
{
io->io_hdr.port_status = 1;
io->io_hdr.flags |= CTL_FLAG_ABORT;
ctl_datamove_done(io, true);
}
static void
nvmft_datamove(union ctl_io *io)
{
struct nvmft_qpair *qp;
qp = NVMFT_QP(io);
nvmft_qpair_datamove(qp, io);
}
void
nvmft_enqueue_task(struct task *task)
{
taskqueue_enqueue(nvmft_taskq, task);
}
void
nvmft_drain_task(struct task *task)
{
taskqueue_drain(nvmft_taskq, task);
}
static void
hip_add(uint64_t pair[2], uint64_t addend)
{
uint64_t old, new;
old = le64toh(pair[0]);
new = old + addend;
pair[0] = htole64(new);
if (new < old)
pair[1] += htole64(1);
}
static void
nvmft_done(union ctl_io *io)
{
struct nvmft_controller *ctrlr;
const struct nvme_command *cmd;
struct nvmft_qpair *qp;
struct nvmf_capsule *nc;
size_t len;
KASSERT(io->io_hdr.status == CTL_SUCCESS ||
io->io_hdr.status == CTL_NVME_ERROR,
("%s: bad status %u", __func__, io->io_hdr.status));
nc = NVMFT_NC(io);
qp = NVMFT_QP(io);
ctrlr = nvmft_qpair_ctrlr(qp);
if (nc == NULL) {
goto end;
}
cmd = nvmf_capsule_sqe(nc);
if (io->io_hdr.status == CTL_SUCCESS)
len = nvmf_capsule_data_len(nc) / 512;
else
len = 0;
switch (cmd->opc) {
case NVME_OPC_WRITE:
mtx_lock(&ctrlr->lock);
hip_add(ctrlr->hip.host_write_commands, 1);
len += ctrlr->partial_duw;
if (len > 1000)
hip_add(ctrlr->hip.data_units_written, len / 1000);
ctrlr->partial_duw = len % 1000;
mtx_unlock(&ctrlr->lock);
break;
case NVME_OPC_READ:
case NVME_OPC_COMPARE:
case NVME_OPC_VERIFY:
mtx_lock(&ctrlr->lock);
if (cmd->opc != NVME_OPC_VERIFY)
hip_add(ctrlr->hip.host_read_commands, 1);
len += ctrlr->partial_dur;
if (len > 1000)
hip_add(ctrlr->hip.data_units_read, len / 1000);
ctrlr->partial_dur = len % 1000;
mtx_unlock(&ctrlr->lock);
break;
}
if (io->nvmeio.success_sent) {
MPASS(io->io_hdr.status == CTL_SUCCESS);
} else {
io->nvmeio.cpl.cid = cmd->cid;
nvmft_send_response(qp, &io->nvmeio.cpl);
}
nvmf_free_capsule(nc);
end:
ctl_free_io(io);
mtx_lock(&ctrlr->lock);
ctrlr->pending_commands--;
if (ctrlr->pending_commands == 0)
ctrlr->busy_total += sbinuptime() - ctrlr->start_busy;
mtx_unlock(&ctrlr->lock);
}
static int
nvmft_init(void)
{
int error;
nvmft_taskq = taskqueue_create("nvmft", M_WAITOK,
taskqueue_thread_enqueue, &nvmft_taskq);
error = taskqueue_start_threads_in_proc(&nvmft_taskq, mp_ncpus, PWAIT,
control_softc->ctl_proc, "nvmft");
if (error != 0) {
taskqueue_free(nvmft_taskq);
return (error);
}
TAILQ_INIT(&nvmft_ports);
sx_init(&nvmft_ports_lock, "nvmft ports");
return (0);
}
void
nvmft_port_free(struct nvmft_port *np)
{
KASSERT(TAILQ_EMPTY(&np->controllers),
("%s(%p): active controllers", __func__, np));
if (np->port.targ_port != -1) {
if (ctl_port_deregister(&np->port) != 0)
printf("%s: ctl_port_deregister() failed\n", __func__);
}
free(np->active_ns, M_NVMFT);
clean_unrhdr(np->ids);
delete_unrhdr(np->ids);
mtx_destroy(&np->lock);
free(np, M_NVMFT);
}
static struct nvmft_port *
nvmft_port_find(const char *subnqn)
{
struct nvmft_port *np;
KASSERT(nvmf_nqn_valid(subnqn), ("%s: invalid nqn", __func__));
sx_assert(&nvmft_ports_lock, SA_LOCKED);
TAILQ_FOREACH(np, &nvmft_ports, link) {
if (strcmp(np->cdata.subnqn, subnqn) == 0)
break;
}
return (np);
}
static struct nvmft_port *
nvmft_port_find_by_id(int port_id)
{
struct nvmft_port *np;
sx_assert(&nvmft_ports_lock, SA_LOCKED);
TAILQ_FOREACH(np, &nvmft_ports, link) {
if (np->port.targ_port == port_id)
break;
}
return (np);
}
static bool
dnvlist_get_strnum(nvlist_t *nvl, const char *name, u_long default_value,
u_long *value)
{
const char *str;
char *cp;
str = dnvlist_get_string(nvl, name, NULL);
if (str == NULL) {
*value = default_value;
return (true);
}
if (*str == '\0')
return (false);
*value = strtoul(str, &cp, 0);
if (*cp != '\0')
return (false);
return (true);
}
static void
nvmft_port_create(struct ctl_req *req)
{
struct nvmft_port *np;
struct ctl_port *port;
const char *serial, *subnqn;
char serial_buf[NVME_SERIAL_NUMBER_LENGTH];
u_long enable_timeout, hostid, ioccsz, iorcsz, max_io_qsize, nn, portid;
int error;
subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL);
if (subnqn == NULL || !nvlist_exists_string(req->args_nvl, "portid")) {
req->status = CTL_LUN_ERROR;
snprintf(req->error_str, sizeof(req->error_str),
"Missing required argument");
return;
}
if (!nvmf_nqn_valid(subnqn)) {
req->status = CTL_LUN_ERROR;
snprintf(req->error_str, sizeof(req->error_str),
"Invalid SubNQN");
return;
}
if (!dnvlist_get_strnum(req->args_nvl, "portid", UINT16_MAX, &portid) ||
portid > UINT16_MAX) {
req->status = CTL_LUN_ERROR;
snprintf(req->error_str, sizeof(req->error_str),
"Invalid port ID");
return;
}
if (!dnvlist_get_strnum(req->args_nvl, "max_io_qsize",
NVMF_MAX_IO_ENTRIES, &max_io_qsize) ||
max_io_qsize < NVME_MIN_IO_ENTRIES ||
max_io_qsize > NVME_MAX_IO_ENTRIES) {
req->status = CTL_LUN_ERROR;
snprintf(req->error_str, sizeof(req->error_str),
"Invalid maximum I/O queue size");
return;
}
if (!dnvlist_get_strnum(req->args_nvl, "enable_timeout",
NVMF_CC_EN_TIMEOUT * 500, &enable_timeout) ||
(enable_timeout % 500) != 0 || (enable_timeout / 500) > 255) {
req->status = CTL_LUN_ERROR;
snprintf(req->error_str, sizeof(req->error_str),
"Invalid enable timeout");
return;
}
if (!dnvlist_get_strnum(req->args_nvl, "ioccsz", NVMF_IOCCSZ,
&ioccsz) || ioccsz < sizeof(struct nvme_command) ||
(ioccsz % 16) != 0) {
req->status = CTL_LUN_ERROR;
snprintf(req->error_str, sizeof(req->error_str),
"Invalid Command Capsule size");
return;
}
if (!dnvlist_get_strnum(req->args_nvl, "iorcsz", NVMF_IORCSZ,
&iorcsz) || iorcsz < sizeof(struct nvme_completion) ||
(iorcsz % 16) != 0) {
req->status = CTL_LUN_ERROR;
snprintf(req->error_str, sizeof(req->error_str),
"Invalid Response Capsule size");
return;
}
if (!dnvlist_get_strnum(req->args_nvl, "nn", NVMF_NN, &nn) ||
nn < 1 || nn > UINT32_MAX) {
req->status = CTL_LUN_ERROR;
snprintf(req->error_str, sizeof(req->error_str),
"Invalid number of namespaces");
return;
}
serial = dnvlist_get_string(req->args_nvl, "serial", NULL);
if (serial == NULL) {
getcredhostid(curthread->td_ucred, &hostid);
nvmf_controller_serial(serial_buf, sizeof(serial_buf), hostid);
serial = serial_buf;
}
sx_xlock(&nvmft_ports_lock);
np = nvmft_port_find(subnqn);
if (np != NULL) {
req->status = CTL_LUN_ERROR;
snprintf(req->error_str, sizeof(req->error_str),
"SubNQN \"%s\" already exists", subnqn);
sx_xunlock(&nvmft_ports_lock);
return;
}
np = malloc(sizeof(*np), M_NVMFT, M_WAITOK | M_ZERO);
refcount_init(&np->refs, 1);
np->portid = portid;
np->max_io_qsize = max_io_qsize;
np->cap = _nvmf_controller_cap(max_io_qsize, enable_timeout / 500);
mtx_init(&np->lock, "nvmft port", NULL, MTX_DEF);
np->ids = new_unrhdr(0, MIN(CTL_MAX_INIT_PER_PORT - 1,
NVMF_CNTLID_STATIC_MAX), UNR_NO_MTX);
TAILQ_INIT(&np->controllers);
_nvmf_init_io_controller_data(0, max_io_qsize, serial, ostype,
osrelease, subnqn, nn, ioccsz, iorcsz, &np->cdata);
np->cdata.aerl = NVMFT_NUM_AER - 1;
np->cdata.oaes = htole32(NVME_ASYNC_EVENT_NS_ATTRIBUTE);
np->cdata.oncs = htole16(NVMEF(NVME_CTRLR_DATA_ONCS_VERIFY, 1) |
NVMEF(NVME_CTRLR_DATA_ONCS_WRZERO, 1) |
NVMEF(NVME_CTRLR_DATA_ONCS_DSM, 1) |
NVMEF(NVME_CTRLR_DATA_ONCS_COMPARE, 1));
np->cdata.fuses = NVMEF(NVME_CTRLR_DATA_FUSES_CNW, 1);
np->fp.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1);
memcpy(np->fp.revision[0], np->cdata.fr, sizeof(np->cdata.fr));
port = &np->port;
port->frontend = &nvmft_frontend;
port->port_type = CTL_PORT_NVMF;
port->num_requested_ctl_io = max_io_qsize;
port->port_name = "nvmf";
port->physical_port = portid;
port->virtual_port = 0;
port->port_online = nvmft_online;
port->port_offline = nvmft_offline;
port->port_info = nvmft_info;
port->onoff_arg = np;
port->lun_enable = nvmft_lun_enable;
port->lun_disable = nvmft_lun_disable;
port->targ_lun_arg = np;
port->fe_datamove = nvmft_datamove;
port->fe_done = nvmft_done;
port->targ_port = -1;
port->options = nvlist_clone(req->args_nvl);
error = ctl_port_register(port);
if (error != 0) {
sx_xunlock(&nvmft_ports_lock);
nvlist_destroy(port->options);
nvmft_port_rele(np);
req->status = CTL_LUN_ERROR;
snprintf(req->error_str, sizeof(req->error_str),
"Failed to register CTL port with error %d", error);
return;
}
TAILQ_INSERT_TAIL(&nvmft_ports, np, link);
sx_xunlock(&nvmft_ports_lock);
req->status = CTL_LUN_OK;
req->result_nvl = nvlist_create(0);
nvlist_add_number(req->result_nvl, "port_id", port->targ_port);
}
static void
nvmft_port_remove(struct ctl_req *req)
{
struct nvmft_port *np;
const char *subnqn;
u_long port_id;
port_id = ULONG_MAX;
subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL);
if (subnqn == NULL) {
if (!nvlist_exists_string(req->args_nvl, "port_id")) {
req->status = CTL_LUN_ERROR;
snprintf(req->error_str, sizeof(req->error_str),
"Missing required argument");
return;
}
if (!dnvlist_get_strnum(req->args_nvl, "port_id", ULONG_MAX,
&port_id)) {
req->status = CTL_LUN_ERROR;
snprintf(req->error_str, sizeof(req->error_str),
"Invalid CTL port ID");
return;
}
} else {
if (nvlist_exists_string(req->args_nvl, "port_id")) {
req->status = CTL_LUN_ERROR;
snprintf(req->error_str, sizeof(req->error_str),
"Ambiguous port removal request");
return;
}
}
sx_xlock(&nvmft_ports_lock);
if (subnqn != NULL) {
np = nvmft_port_find(subnqn);
if (np == NULL) {
req->status = CTL_LUN_ERROR;
snprintf(req->error_str, sizeof(req->error_str),
"SubNQN \"%s\" does not exist", subnqn);
sx_xunlock(&nvmft_ports_lock);
return;
}
} else {
np = nvmft_port_find_by_id(port_id);
if (np == NULL) {
req->status = CTL_LUN_ERROR;
snprintf(req->error_str, sizeof(req->error_str),
"CTL port %lu is not a NVMF port", port_id);
sx_xunlock(&nvmft_ports_lock);
return;
}
}
TAILQ_REMOVE(&nvmft_ports, np, link);
sx_xunlock(&nvmft_ports_lock);
mtx_lock(&np->lock);
if (np->online) {
mtx_unlock(&np->lock);
ctl_port_offline(&np->port);
} else
mtx_unlock(&np->lock);
nvmft_port_rele(np);
req->status = CTL_LUN_OK;
}
static void
nvmft_handoff(struct ctl_nvmf *cn)
{
const struct nvmf_fabric_connect_cmd *cmd;
const struct nvmf_fabric_connect_data *data;
const nvlist_t *params;
struct nvmft_port *np;
nvlist_t *nvl;
size_t len;
enum nvmf_trtype trtype;
int error;
np = NULL;
error = nvmf_unpack_ioc_nvlist(&cn->data.handoff, &nvl);
if (error != 0) {
cn->status = CTL_NVMF_ERROR;
snprintf(cn->error_str, sizeof(cn->error_str),
"Failed to copyin and unpack handoff arguments");
return;
}
if (!nvlist_exists_number(nvl, "trtype") ||
!nvlist_exists_nvlist(nvl, "params") ||
!nvlist_exists_binary(nvl, "cmd") ||
!nvlist_exists_binary(nvl, "data")) {
cn->status = CTL_NVMF_ERROR;
snprintf(cn->error_str, sizeof(cn->error_str),
"Handoff arguments missing required value");
goto out;
}
params = nvlist_get_nvlist(nvl, "params");
if (!nvmf_validate_qpair_nvlist(params, true)) {
cn->status = CTL_NVMF_ERROR;
snprintf(cn->error_str, sizeof(cn->error_str),
"Invalid queue pair parameters");
goto out;
}
cmd = nvlist_get_binary(nvl, "cmd", &len);
if (len != sizeof(*cmd)) {
cn->status = CTL_NVMF_ERROR;
snprintf(cn->error_str, sizeof(cn->error_str),
"Wrong size for CONNECT SQE");
goto out;
}
data = nvlist_get_binary(nvl, "data", &len);
if (len != sizeof(*data)) {
cn->status = CTL_NVMF_ERROR;
snprintf(cn->error_str, sizeof(cn->error_str),
"Wrong size for CONNECT data");
goto out;
}
if (!nvmf_nqn_valid(data->subnqn)) {
cn->status = CTL_NVMF_ERROR;
snprintf(cn->error_str, sizeof(cn->error_str),
"Invalid SubNQN");
goto out;
}
sx_slock(&nvmft_ports_lock);
np = nvmft_port_find(data->subnqn);
if (np == NULL) {
sx_sunlock(&nvmft_ports_lock);
cn->status = CTL_NVMF_ERROR;
snprintf(cn->error_str, sizeof(cn->error_str),
"Unknown SubNQN");
goto out;
}
if (!np->online) {
sx_sunlock(&nvmft_ports_lock);
cn->status = CTL_NVMF_ERROR;
snprintf(cn->error_str, sizeof(cn->error_str),
"CTL port offline");
np = NULL;
goto out;
}
nvmft_port_ref(np);
sx_sunlock(&nvmft_ports_lock);
trtype = nvlist_get_number(nvl, "trtype");
if (nvlist_get_bool(params, "admin")) {
error = nvmft_handoff_admin_queue(np, trtype, params, cmd,
data);
if (error != 0) {
cn->status = CTL_NVMF_ERROR;
snprintf(cn->error_str, sizeof(cn->error_str),
"Failed to handoff admin queue: %d", error);
goto out;
}
} else {
error = nvmft_handoff_io_queue(np, trtype, params, cmd, data);
if (error != 0) {
cn->status = CTL_NVMF_ERROR;
snprintf(cn->error_str, sizeof(cn->error_str),
"Failed to handoff I/O queue: %d", error);
goto out;
}
}
cn->status = CTL_NVMF_OK;
out:
if (np != NULL)
nvmft_port_rele(np);
nvlist_destroy(nvl);
}
static void
nvmft_list(struct ctl_nvmf *cn)
{
struct ctl_nvmf_list_params *lp;
struct nvmft_controller *ctrlr;
struct nvmft_port *np;
struct sbuf *sb;
int error;
lp = &cn->data.list;
sb = sbuf_new(NULL, NULL, lp->alloc_len, SBUF_FIXEDLEN |
SBUF_INCLUDENUL);
if (sb == NULL) {
cn->status = CTL_NVMF_ERROR;
snprintf(cn->error_str, sizeof(cn->error_str),
"Failed to allocate NVMeoF session list");
return;
}
sbuf_printf(sb, "<ctlnvmflist>\n");
sx_slock(&nvmft_ports_lock);
TAILQ_FOREACH(np, &nvmft_ports, link) {
mtx_lock(&np->lock);
TAILQ_FOREACH(ctrlr, &np->controllers, link) {
sbuf_printf(sb, "<connection id=\"%d\">"
"<hostnqn>%s</hostnqn>"
"<subnqn>%s</subnqn>"
"<trtype>%u</trtype>"
"</connection>\n",
ctrlr->cntlid,
ctrlr->hostnqn,
np->cdata.subnqn,
ctrlr->trtype);
}
mtx_unlock(&np->lock);
}
sx_sunlock(&nvmft_ports_lock);
sbuf_printf(sb, "</ctlnvmflist>\n");
if (sbuf_finish(sb) != 0) {
sbuf_delete(sb);
cn->status = CTL_NVMF_LIST_NEED_MORE_SPACE;
snprintf(cn->error_str, sizeof(cn->error_str),
"Out of space, %d bytes is too small", lp->alloc_len);
return;
}
error = copyout(sbuf_data(sb), lp->conn_xml, sbuf_len(sb));
if (error != 0) {
sbuf_delete(sb);
cn->status = CTL_NVMF_ERROR;
snprintf(cn->error_str, sizeof(cn->error_str),
"Failed to copyout session list: %d", error);
return;
}
lp->fill_len = sbuf_len(sb);
cn->status = CTL_NVMF_OK;
sbuf_delete(sb);
}
static void
nvmft_terminate(struct ctl_nvmf *cn)
{
struct ctl_nvmf_terminate_params *tp;
struct nvmft_controller *ctrlr;
struct nvmft_port *np;
bool found, match;
tp = &cn->data.terminate;
found = false;
sx_slock(&nvmft_ports_lock);
TAILQ_FOREACH(np, &nvmft_ports, link) {
mtx_lock(&np->lock);
TAILQ_FOREACH(ctrlr, &np->controllers, link) {
if (tp->all != 0)
match = true;
else if (tp->cntlid != -1)
match = tp->cntlid == ctrlr->cntlid;
else if (tp->hostnqn[0] != '\0')
match = strncmp(tp->hostnqn, ctrlr->hostnqn,
sizeof(tp->hostnqn)) == 0;
else
match = false;
if (!match)
continue;
nvmft_printf(ctrlr,
"disconnecting due to administrative request\n");
nvmft_controller_error(ctrlr, NULL, ECONNABORTED);
found = true;
}
mtx_unlock(&np->lock);
}
sx_sunlock(&nvmft_ports_lock);
if (!found) {
cn->status = CTL_NVMF_ASSOCIATION_NOT_FOUND;
snprintf(cn->error_str, sizeof(cn->error_str),
"No matching associations found");
return;
}
cn->status = CTL_NVMF_OK;
}
static int
nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int flag,
struct thread *td)
{
struct ctl_nvmf *cn;
struct ctl_req *req;
switch (cmd) {
case CTL_PORT_REQ:
req = (struct ctl_req *)data;
switch (req->reqtype) {
case CTL_REQ_CREATE:
nvmft_port_create(req);
break;
case CTL_REQ_REMOVE:
nvmft_port_remove(req);
break;
default:
req->status = CTL_LUN_ERROR;
snprintf(req->error_str, sizeof(req->error_str),
"Unsupported request type %d", req->reqtype);
break;
}
return (0);
case CTL_NVMF:
cn = (struct ctl_nvmf *)data;
switch (cn->type) {
case CTL_NVMF_HANDOFF:
nvmft_handoff(cn);
break;
case CTL_NVMF_LIST:
nvmft_list(cn);
break;
case CTL_NVMF_TERMINATE:
nvmft_terminate(cn);
break;
default:
cn->status = CTL_NVMF_ERROR;
snprintf(cn->error_str, sizeof(cn->error_str),
"Invalid NVMeoF request type %d", cn->type);
break;
}
return (0);
default:
return (ENOTTY);
}
}
static int
nvmft_shutdown(void)
{
if (!TAILQ_EMPTY(&nvmft_ports))
return (EBUSY);
taskqueue_free(nvmft_taskq);
sx_destroy(&nvmft_ports_lock);
return (0);
}
CTL_FRONTEND_DECLARE(nvmft, nvmft_frontend);
MODULE_DEPEND(nvmft, nvmf_transport, 1, 1, 1);