#include "nvme_internal.h"
static inline unsigned int
nvme_ctrlr_get_ready_to_in_ms(struct nvme_ctrlr *ctrlr)
{
union nvme_cap_register cap;
#define NVME_READY_TIMEOUT_UNIT 500
cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw);
return (NVME_READY_TIMEOUT_UNIT * cap.bits.to);
}
static int nvme_ctrlr_create_qpair(struct nvme_ctrlr *ctrlr,
struct nvme_qpair *qpair)
{
int ret;
ret = nvme_admin_create_ioq(ctrlr, qpair, NVME_IO_COMPLETION_QUEUE);
if (ret != 0) {
nvme_notice("Create completion queue %u failed\n",
qpair->id);
return ret;
}
ret = nvme_admin_create_ioq(ctrlr, qpair, NVME_IO_SUBMISSION_QUEUE);
if (ret != 0) {
nvme_notice("Create submission queue %u failed\n",
qpair->id);
nvme_admin_delete_ioq(ctrlr, qpair, NVME_IO_COMPLETION_QUEUE);
return ret;
}
nvme_qpair_reset(qpair);
return 0;
}
static int nvme_ctrlr_delete_qpair(struct nvme_ctrlr *ctrlr,
struct nvme_qpair *qpair)
{
int ret;
ret = nvme_admin_delete_ioq(ctrlr, qpair, NVME_IO_SUBMISSION_QUEUE);
if (ret != 0) {
nvme_notice("Delete submission queue %u failed\n",
qpair->id);
return ret;
}
ret = nvme_admin_delete_ioq(ctrlr, qpair, NVME_IO_COMPLETION_QUEUE);
if (ret != 0) {
nvme_notice("Delete completion queue %u failed\n",
qpair->id);
return ret;
}
return 0;
}
static void
nvme_ctrlr_construct_intel_support_log_page_list(struct nvme_ctrlr *ctrlr,
struct nvme_intel_log_page_dir *log_page_dir)
{
if (ctrlr->cdata.vid != NVME_PCI_VID_INTEL ||
log_page_dir == NULL)
return;
ctrlr->log_page_supported[NVME_INTEL_LOG_PAGE_DIR] = true;
if (log_page_dir->read_latency_log_len ||
(ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY))
ctrlr->log_page_supported[NVME_INTEL_LOG_READ_CMD_LATENCY] = true;
if (log_page_dir->write_latency_log_len ||
(ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY))
ctrlr->log_page_supported[NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true;
if (log_page_dir->temperature_statistics_log_len)
ctrlr->log_page_supported[NVME_INTEL_LOG_TEMPERATURE] = true;
if (log_page_dir->smart_log_len)
ctrlr->log_page_supported[NVME_INTEL_LOG_SMART] = true;
if (log_page_dir->marketing_description_log_len)
ctrlr->log_page_supported[NVME_INTEL_MARKETING_DESCRIPTION] = true;
}
static int nvme_ctrlr_set_intel_support_log_pages(struct nvme_ctrlr *ctrlr)
{
struct nvme_intel_log_page_dir *log_page_dir;
int ret;
log_page_dir = nvme_zmalloc(sizeof(struct nvme_intel_log_page_dir), 64);
if (!log_page_dir) {
nvme_err("Allocate log_page_directory failed\n");
return ENOMEM;
}
ret = nvme_admin_get_log_page(ctrlr, NVME_INTEL_LOG_PAGE_DIR,
NVME_GLOBAL_NS_TAG,
log_page_dir,
sizeof(struct nvme_intel_log_page_dir));
if (ret != 0)
nvme_notice("Get NVME_INTEL_LOG_PAGE_DIR log page failed\n");
else
nvme_ctrlr_construct_intel_support_log_page_list(ctrlr,
log_page_dir);
nvme_free(log_page_dir);
return ret;
}
static void nvme_ctrlr_set_supported_log_pages(struct nvme_ctrlr *ctrlr)
{
memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported));
ctrlr->log_page_supported[NVME_LOG_ERROR] = true;
ctrlr->log_page_supported[NVME_LOG_HEALTH_INFORMATION] = true;
ctrlr->log_page_supported[NVME_LOG_FIRMWARE_SLOT] = true;
if (ctrlr->cdata.lpa.celp)
ctrlr->log_page_supported[NVME_LOG_COMMAND_EFFECTS_LOG] = true;
if (ctrlr->cdata.vid == NVME_PCI_VID_INTEL)
nvme_ctrlr_set_intel_support_log_pages(ctrlr);
}
static void nvme_ctrlr_set_intel_supported_features(struct nvme_ctrlr *ctrlr)
{
bool *supported_feature = ctrlr->feature_supported;
supported_feature[NVME_INTEL_FEAT_MAX_LBA] = true;
supported_feature[NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true;
supported_feature[NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true;
supported_feature[NVME_INTEL_FEAT_SMBUS_ADDRESS] = true;
supported_feature[NVME_INTEL_FEAT_LED_PATTERN] = true;
supported_feature[NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true;
supported_feature[NVME_INTEL_FEAT_LATENCY_TRACKING] = true;
}
static void nvme_ctrlr_set_supported_features(struct nvme_ctrlr *ctrlr)
{
bool *supported_feature = ctrlr->feature_supported;
memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported));
supported_feature[NVME_FEAT_ARBITRATION] = true;
supported_feature[NVME_FEAT_POWER_MANAGEMENT] = true;
supported_feature[NVME_FEAT_TEMPERATURE_THRESHOLD] = true;
supported_feature[NVME_FEAT_ERROR_RECOVERY] = true;
supported_feature[NVME_FEAT_NUMBER_OF_QUEUES] = true;
supported_feature[NVME_FEAT_INTERRUPT_COALESCING] = true;
supported_feature[NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true;
supported_feature[NVME_FEAT_WRITE_ATOMICITY] = true;
supported_feature[NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true;
if (ctrlr->cdata.vwc.present)
supported_feature[NVME_FEAT_VOLATILE_WRITE_CACHE] = true;
if (ctrlr->cdata.apsta.supported)
supported_feature[NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION]
= true;
if (ctrlr->cdata.hmpre)
supported_feature[NVME_FEAT_HOST_MEM_BUFFER] = true;
if (ctrlr->cdata.vid == NVME_PCI_VID_INTEL)
nvme_ctrlr_set_intel_supported_features(ctrlr);
}
static int nvme_ctrlr_init_io_qpairs(struct nvme_ctrlr *ctrlr)
{
struct nvme_qpair *qpair;
union nvme_cap_register cap;
uint32_t i;
if (ctrlr->ioq != NULL)
return 0;
cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw);
ctrlr->io_qpairs_max_entries =
nvme_min(NVME_IO_ENTRIES, (unsigned int)cap.bits.mqes + 1);
ctrlr->ioq = calloc(ctrlr->io_queues, sizeof(struct nvme_qpair));
if (!ctrlr->ioq)
return ENOMEM;
for (i = 0; i < ctrlr->io_queues; i++) {
qpair = &ctrlr->ioq[i];
qpair->id = i + 1;
TAILQ_INSERT_TAIL(&ctrlr->free_io_qpairs, qpair, tailq);
}
return 0;
}
static void nvme_ctrlr_shutdown(struct nvme_ctrlr *ctrlr)
{
union nvme_cc_register cc;
union nvme_csts_register csts;
int ms_waited = 0;
cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw);
cc.bits.shn = NVME_SHN_NORMAL;
nvme_reg_mmio_write_4(ctrlr, cc.raw, cc.raw);
csts.raw = nvme_reg_mmio_read_4(ctrlr, csts.raw);
#define NVME_CTRLR_SHUTDOWN_TIMEOUT 5000
while (csts.bits.shst != NVME_SHST_COMPLETE) {
nvme_usleep(1000);
csts.raw = nvme_reg_mmio_read_4(ctrlr, csts.raw);
if (ms_waited++ >= NVME_CTRLR_SHUTDOWN_TIMEOUT)
break;
}
if (csts.bits.shst != NVME_SHST_COMPLETE)
nvme_err("Controller did not shutdown within %d seconds\n",
NVME_CTRLR_SHUTDOWN_TIMEOUT / 1000);
}
static int nvme_ctrlr_enable(struct nvme_ctrlr *ctrlr)
{
union nvme_cc_register cc;
union nvme_aqa_register aqa;
union nvme_cap_register cap;
cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw);
if (cc.bits.en != 0) {
nvme_err("COntroller enable called with CC.EN = 1\n");
return EINVAL;
}
nvme_reg_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr);
nvme_reg_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr);
aqa.raw = 0;
aqa.bits.acqs = ctrlr->adminq.entries - 1;
aqa.bits.asqs = ctrlr->adminq.entries - 1;
nvme_reg_mmio_write_4(ctrlr, aqa.raw, aqa.raw);
cc.bits.en = 1;
cc.bits.css = 0;
cc.bits.shn = 0;
cc.bits.iosqes = 6;
cc.bits.iocqes = 4;
cc.bits.mps = PAGE_SHIFT - 12;
cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw);
switch (ctrlr->opts.arb_mechanism) {
case NVME_CC_AMS_RR:
break;
case NVME_CC_AMS_WRR:
if (NVME_CAP_AMS_WRR & cap.bits.ams)
break;
return EINVAL;
case NVME_CC_AMS_VS:
if (NVME_CAP_AMS_VS & cap.bits.ams)
break;
return EINVAL;
default:
return EINVAL;
}
cc.bits.ams = ctrlr->opts.arb_mechanism;
nvme_reg_mmio_write_4(ctrlr, cc.raw, cc.raw);
return 0;
}
static inline void nvme_ctrlr_disable(struct nvme_ctrlr *ctrlr)
{
union nvme_cc_register cc;
cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw);
cc.bits.en = 0;
nvme_reg_mmio_write_4(ctrlr, cc.raw, cc.raw);
}
static inline int nvme_ctrlr_enabled(struct nvme_ctrlr *ctrlr)
{
union nvme_cc_register cc;
cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw);
return cc.bits.en;
}
static inline int nvme_ctrlr_ready(struct nvme_ctrlr *ctrlr)
{
union nvme_csts_register csts;
csts.raw = nvme_reg_mmio_read_4(ctrlr, csts.raw);
return csts.bits.rdy;
}
static void nvme_ctrlr_set_state(struct nvme_ctrlr *ctrlr,
enum nvme_ctrlr_state state,
uint64_t timeout_in_ms)
{
ctrlr->state = state;
if (timeout_in_ms == NVME_TIMEOUT_INFINITE)
ctrlr->state_timeout_ms = NVME_TIMEOUT_INFINITE;
else
ctrlr->state_timeout_ms = nvme_time_msec() + timeout_in_ms;
}
static int nvme_ctrlr_identify(struct nvme_ctrlr *ctrlr)
{
int ret;
ret = nvme_admin_identify_ctrlr(ctrlr, &ctrlr->cdata);
if (ret != 0) {
nvme_notice("Identify controller failed\n");
return ret;
}
if (ctrlr->cdata.mdts > 0)
ctrlr->max_xfer_size = nvme_min(ctrlr->max_xfer_size,
ctrlr->min_page_size
* (1 << (ctrlr->cdata.mdts)));
return 0;
}
static int nvme_ctrlr_get_max_io_qpairs(struct nvme_ctrlr *ctrlr)
{
unsigned int cdw0, cq_allocated, sq_allocated;
int ret;
ret = nvme_admin_get_feature(ctrlr, NVME_FEAT_CURRENT,
NVME_FEAT_NUMBER_OF_QUEUES,
0, &cdw0);
if (ret != 0) {
nvme_notice("Get feature NVME_FEAT_NUMBER_OF_QUEUES failed\n");
return ret;
}
sq_allocated = (cdw0 & 0xFFFF) + 1;
cq_allocated = (cdw0 >> 16) + 1;
ctrlr->max_io_queues = nvme_min(sq_allocated, cq_allocated);
return 0;
}
static int nvme_ctrlr_set_num_qpairs(struct nvme_ctrlr *ctrlr)
{
unsigned int num_queues, cdw0;
unsigned int cq_allocated, sq_allocated;
int ret;
ret = nvme_ctrlr_get_max_io_qpairs(ctrlr);
if (ret != 0) {
nvme_notice("Failed to get the maximum of I/O qpairs\n");
return ret;
}
num_queues = ((ctrlr->opts.io_queues - 1) << 16) |
(ctrlr->opts.io_queues - 1);
ret = nvme_admin_set_feature(ctrlr, false, NVME_FEAT_NUMBER_OF_QUEUES,
num_queues, 0, 0, 0, 0, NULL, 0, &cdw0);
if (ret != 0) {
nvme_notice("Set feature NVME_FEAT_NUMBER_OF_QUEUES failed\n");
return ret;
}
sq_allocated = (cdw0 & 0xFFFF) + 1;
cq_allocated = (cdw0 >> 16) + 1;
ctrlr->io_queues = nvme_min(sq_allocated, cq_allocated);
ctrlr->io_queues = nvme_min(ctrlr->io_queues, ctrlr->opts.io_queues);
return 0;
}
static void nvme_ctrlr_destruct_namespaces(struct nvme_ctrlr *ctrlr)
{
if (ctrlr->ns) {
free(ctrlr->ns);
ctrlr->ns = NULL;
ctrlr->nr_ns = 0;
}
if (ctrlr->nsdata) {
nvme_free(ctrlr->nsdata);
ctrlr->nsdata = NULL;
}
}
static int nvme_ctrlr_construct_namespaces(struct nvme_ctrlr *ctrlr)
{
unsigned int i, nr_ns = ctrlr->cdata.nn;
struct nvme_ns *ns = NULL;
if (nr_ns != ctrlr->nr_ns) {
nvme_ctrlr_destruct_namespaces(ctrlr);
ctrlr->ns = calloc(nr_ns, sizeof(struct nvme_ns));
if (!ctrlr->ns)
goto fail;
nvme_debug("Allocate %u namespace data\n", nr_ns);
ctrlr->nsdata = nvme_calloc(nr_ns, sizeof(struct nvme_ns_data),
PAGE_SIZE);
if (!ctrlr->nsdata)
goto fail;
ctrlr->nr_ns = nr_ns;
}
for (i = 0; i < nr_ns; i++) {
ns = &ctrlr->ns[i];
if (nvme_ns_construct(ctrlr, ns, i + 1) != 0)
goto fail;
}
return 0;
fail:
nvme_ctrlr_destruct_namespaces(ctrlr);
return -1;
}
static int nvme_ctrlr_construct_and_submit_aer(struct nvme_ctrlr *ctrlr,
struct nvme_async_event_request *aer);
static void nvme_ctrlr_async_event_cb(void *arg, const struct nvme_cpl *cpl)
{
struct nvme_async_event_request *aer = arg;
struct nvme_ctrlr *ctrlr = aer->ctrlr;
if (cpl->status.sc == NVME_SC_ABORTED_SQ_DELETION)
return;
if (ctrlr->aer_cb_fn != NULL)
ctrlr->aer_cb_fn(ctrlr->aer_cb_arg, cpl);
if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer))
nvme_err("Initialize AER failed\n");
}
static int nvme_ctrlr_construct_and_submit_aer(struct nvme_ctrlr *ctrlr,
struct nvme_async_event_request *aer)
{
struct nvme_request *req;
req = nvme_request_allocate_null(&ctrlr->adminq,
nvme_ctrlr_async_event_cb, aer);
if (req == NULL)
return -1;
aer->ctrlr = ctrlr;
aer->req = req;
req->cmd.opc = NVME_OPC_ASYNC_EVENT_REQUEST;
return nvme_qpair_submit_request(&ctrlr->adminq, req);
}
static int nvme_ctrlr_configure_aer(struct nvme_ctrlr *ctrlr)
{
union nvme_critical_warning_state state;
struct nvme_async_event_request *aer;
unsigned int i;
int ret;
state.raw = 0xFF;
state.bits.reserved = 0;
ret = nvme_admin_set_feature(ctrlr, false,
NVME_FEAT_ASYNC_EVENT_CONFIGURATION,
state.raw, 0, 0, 0, 0, NULL, 0, NULL);
if (ret != 0) {
nvme_notice("Set feature ASYNC_EVENT_CONFIGURATION failed\n");
return ret;
}
ctrlr->num_aers = nvme_min(NVME_MAX_ASYNC_EVENTS,
(ctrlr->cdata.aerl + 1));
for (i = 0; i < ctrlr->num_aers; i++) {
aer = &ctrlr->aer[i];
if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
nvme_notice("Construct AER failed\n");
return -1;
}
}
return 0;
}
static int nvme_ctrlr_start(struct nvme_ctrlr *ctrlr)
{
nvme_qpair_reset(&ctrlr->adminq);
nvme_qpair_enable(&ctrlr->adminq);
if (nvme_ctrlr_identify(ctrlr) != 0)
return -1;
if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0)
return -1;
if (nvme_ctrlr_init_io_qpairs(ctrlr))
return -1;
if (nvme_ctrlr_construct_namespaces(ctrlr) != 0)
return -1;
if (nvme_ctrlr_configure_aer(ctrlr) != 0)
nvme_warning("controller does not support AER!\n");
nvme_ctrlr_set_supported_log_pages(ctrlr);
nvme_ctrlr_set_supported_features(ctrlr);
if (ctrlr->cdata.sgls.supported)
ctrlr->flags |= NVME_CTRLR_SGL_SUPPORTED;
return 0;
}
static void nvme_ctrlr_map_cmb(struct nvme_ctrlr *ctrlr)
{
int ret;
void *addr;
uint32_t bir;
union nvme_cmbsz_register cmbsz;
union nvme_cmbloc_register cmbloc;
uint64_t size, unit_size, offset, bar_size, bar_phys_addr;
cmbsz.raw = nvme_reg_mmio_read_4(ctrlr, cmbsz.raw);
cmbloc.raw = nvme_reg_mmio_read_4(ctrlr, cmbloc.raw);
if (!cmbsz.bits.sz)
goto out;
bir = cmbloc.bits.bir;
if (bir > 5 || bir == 1)
goto out;
unit_size = (uint64_t)1 << (12 + 4 * cmbsz.bits.szu);
size = unit_size * cmbsz.bits.sz;
offset = unit_size * cmbloc.bits.ofst;
nvme_pcicfg_get_bar_addr_len(ctrlr->pci_dev, bir, &bar_phys_addr,
&bar_size);
if (offset > bar_size)
goto out;
if (size > bar_size - offset)
goto out;
ret = nvme_pcicfg_map_bar_write_combine(ctrlr->pci_dev, bir, &addr);
if ((ret != 0) || addr == NULL)
goto out;
ctrlr->cmb_bar_virt_addr = addr;
ctrlr->cmb_bar_phys_addr = bar_phys_addr;
ctrlr->cmb_size = size;
ctrlr->cmb_current_offset = offset;
if (!cmbsz.bits.sqs)
ctrlr->opts.use_cmb_sqs = false;
return;
out:
ctrlr->cmb_bar_virt_addr = NULL;
ctrlr->opts.use_cmb_sqs = false;
return;
}
static int nvme_ctrlr_unmap_cmb(struct nvme_ctrlr *ctrlr)
{
union nvme_cmbloc_register cmbloc;
void *addr = ctrlr->cmb_bar_virt_addr;
int ret = 0;
if (addr) {
cmbloc.raw = nvme_reg_mmio_read_4(ctrlr, cmbloc.raw);
ret = nvme_pcicfg_unmap_bar(ctrlr->pci_dev, cmbloc.bits.bir,
addr);
}
return ret;
}
static int nvme_ctrlr_map_bars(struct nvme_ctrlr *ctrlr)
{
void *addr;
int ret;
ret = nvme_pcicfg_map_bar(ctrlr->pci_dev, 0, 0, &addr);
if (ret != 0 || addr == NULL) {
nvme_err("Map PCI device bar failed %d (%s)\n",
ret, strerror(ret));
return ret;
}
nvme_debug("Controller BAR mapped at %p\n", addr);
ctrlr->regs = (volatile struct nvme_registers *)addr;
nvme_ctrlr_map_cmb(ctrlr);
return 0;
}
static int nvme_ctrlr_unmap_bars(struct nvme_ctrlr *ctrlr)
{
void *addr = (void *)ctrlr->regs;
int ret;
ret = nvme_ctrlr_unmap_cmb(ctrlr);
if (ret != 0) {
nvme_err("Unmap controller side buffer failed %d\n", ret);
return ret;
}
if (addr) {
ret = nvme_pcicfg_unmap_bar(ctrlr->pci_dev, 0, addr);
if (ret != 0) {
nvme_err("Unmap PCI device bar failed %d\n", ret);
return ret;
}
}
return 0;
}
static void nvme_ctrlr_fail(struct nvme_ctrlr *ctrlr)
{
unsigned int i;
ctrlr->failed = true;
nvme_qpair_fail(&ctrlr->adminq);
if (ctrlr->ioq)
for (i = 0; i < ctrlr->io_queues; i++)
nvme_qpair_fail(&ctrlr->ioq[i]);
}
static int nvme_ctrlr_init(struct nvme_ctrlr *ctrlr)
{
unsigned int ready_timeout_in_ms = nvme_ctrlr_get_ready_to_in_ms(ctrlr);
int ret;
switch (ctrlr->state) {
case NVME_CTRLR_STATE_INIT:
if (nvme_ctrlr_enabled(ctrlr)) {
if (!nvme_ctrlr_ready(ctrlr)) {
nvme_ctrlr_set_state(ctrlr,
NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1,
ready_timeout_in_ms);
return 0;
}
nvme_ctrlr_disable(ctrlr);
nvme_ctrlr_set_state(ctrlr,
NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
ready_timeout_in_ms);
if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
nvme_msleep(2000);
return 0;
}
if (nvme_ctrlr_ready(ctrlr)) {
nvme_ctrlr_set_state(ctrlr,
NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
ready_timeout_in_ms);
return 0;
}
ret = nvme_ctrlr_enable(ctrlr);
if (ret)
nvme_err("Enable controller failed\n");
else
nvme_ctrlr_set_state(ctrlr,
NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1,
ready_timeout_in_ms);
return ret;
case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
if (nvme_ctrlr_ready(ctrlr)) {
nvme_ctrlr_disable(ctrlr);
nvme_ctrlr_set_state(ctrlr,
NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
ready_timeout_in_ms);
return 0;
}
break;
case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
if (!nvme_ctrlr_ready(ctrlr)) {
ret = nvme_ctrlr_enable(ctrlr);
if (ret)
nvme_err("Enable controller failed\n");
else
nvme_ctrlr_set_state(ctrlr,
NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1,
ready_timeout_in_ms);
return ret;
}
break;
case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
if (nvme_ctrlr_ready(ctrlr)) {
if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_RDY)
nvme_msleep(2000);
ret = nvme_ctrlr_start(ctrlr);
if (ret)
nvme_err("Start controller failed\n");
else
nvme_ctrlr_set_state(ctrlr,
NVME_CTRLR_STATE_READY,
NVME_TIMEOUT_INFINITE);
return ret;
}
break;
default:
nvme_panic("Unhandled ctrlr state %d\n", ctrlr->state);
nvme_ctrlr_fail(ctrlr);
return -1;
}
if ((ctrlr->state_timeout_ms != NVME_TIMEOUT_INFINITE) &&
(nvme_time_msec() > ctrlr->state_timeout_ms)) {
nvme_err("Initialization timed out in state %d\n",
ctrlr->state);
nvme_ctrlr_fail(ctrlr);
return -1;
}
return 0;
}
static int nvme_ctrlr_reset(struct nvme_ctrlr *ctrlr)
{
struct nvme_qpair *qpair;
unsigned int i;
if (ctrlr->resetting || ctrlr->failed)
return 0;
ctrlr->resetting = true;
nvme_qpair_disable(&ctrlr->adminq);
for (i = 0; i < ctrlr->io_queues; i++)
nvme_qpair_disable(&ctrlr->ioq[i]);
nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT,
NVME_TIMEOUT_INFINITE);
while (ctrlr->state != NVME_CTRLR_STATE_READY) {
if (nvme_ctrlr_init(ctrlr) != 0) {
nvme_crit("Controller reset failed\n");
nvme_ctrlr_fail(ctrlr);
goto out;
}
}
TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
if (nvme_ctrlr_create_qpair(ctrlr, qpair) != 0)
nvme_ctrlr_fail(ctrlr);
}
out:
ctrlr->resetting = false;
return ctrlr->failed ? -1 : 0;
}
static void nvme_ctrlr_set_opts(struct nvme_ctrlr *ctrlr,
struct nvme_ctrlr_opts *opts)
{
if (opts)
memcpy(&ctrlr->opts, opts, sizeof(struct nvme_ctrlr_opts));
else
memset(&ctrlr->opts, 0, sizeof(struct nvme_ctrlr_opts));
if (ctrlr->opts.io_queues == 0)
ctrlr->opts.io_queues = DEFAULT_MAX_IO_QUEUES;
if (ctrlr->opts.io_queues > NVME_MAX_IO_QUEUES) {
nvme_info("Limiting requested I/O queues %u to %d\n",
ctrlr->opts.io_queues, NVME_MAX_IO_QUEUES);
ctrlr->opts.io_queues = NVME_MAX_IO_QUEUES;
}
}
struct nvme_ctrlr *
nvme_ctrlr_attach(struct pci_device *pci_dev,
struct nvme_ctrlr_opts *opts)
{
struct nvme_ctrlr *ctrlr;
union nvme_cap_register cap;
uint32_t cmd_reg;
int ret;
ctrlr = malloc(sizeof(struct nvme_ctrlr));
if (!ctrlr) {
nvme_err("Allocate controller handle failed\n");
return NULL;
}
nvme_debug("New controller handle %p\n", ctrlr);
memset(ctrlr, 0, sizeof(struct nvme_ctrlr));
ctrlr->pci_dev = pci_dev;
ctrlr->resetting = false;
ctrlr->failed = false;
TAILQ_INIT(&ctrlr->free_io_qpairs);
TAILQ_INIT(&ctrlr->active_io_qpairs);
pthread_mutex_init(&ctrlr->lock, NULL);
ctrlr->quirks = nvme_ctrlr_get_quirks(pci_dev);
nvme_ctrlr_set_state(ctrlr,
NVME_CTRLR_STATE_INIT,
NVME_TIMEOUT_INFINITE);
ret = nvme_ctrlr_map_bars(ctrlr);
if (ret != 0) {
nvme_err("Map controller BAR failed\n");
pthread_mutex_destroy(&ctrlr->lock);
free(ctrlr);
return NULL;
}
nvme_pcicfg_read32(pci_dev, &cmd_reg, 4);
cmd_reg |= 0x0404;
nvme_pcicfg_write32(pci_dev, cmd_reg, 4);
cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw);
ctrlr->doorbell_stride_u32 = 1 << cap.bits.dstrd;
ctrlr->min_page_size = 1 << (12 + cap.bits.mpsmin);
ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
ret = nvme_qpair_construct(ctrlr, &ctrlr->adminq, 0,
NVME_ADMIN_ENTRIES, NVME_ADMIN_TRACKERS);
if (ret != 0) {
nvme_err("Initialize admin queue pair failed\n");
goto err;
}
nvme_ctrlr_set_opts(ctrlr, opts);
do {
ret = nvme_ctrlr_init(ctrlr);
if (ret)
goto err;
} while (ctrlr->state != NVME_CTRLR_STATE_READY);
return ctrlr;
err:
nvme_ctrlr_detach(ctrlr);
return NULL;
}
void nvme_ctrlr_detach(struct nvme_ctrlr *ctrlr)
{
struct nvme_qpair *qpair;
uint32_t i;
while (!TAILQ_EMPTY(&ctrlr->active_io_qpairs)) {
qpair = TAILQ_FIRST(&ctrlr->active_io_qpairs);
nvme_ioqp_release(qpair);
}
nvme_ctrlr_shutdown(ctrlr);
nvme_ctrlr_destruct_namespaces(ctrlr);
if (ctrlr->ioq) {
for (i = 0; i < ctrlr->io_queues; i++)
nvme_qpair_destroy(&ctrlr->ioq[i]);
free(ctrlr->ioq);
}
nvme_qpair_destroy(&ctrlr->adminq);
nvme_ctrlr_unmap_bars(ctrlr);
pthread_mutex_destroy(&ctrlr->lock);
free(ctrlr);
}
int nvme_ctrlr_get_feature(struct nvme_ctrlr *ctrlr,
enum nvme_feat_sel sel, enum nvme_feat feature,
uint32_t cdw11,
uint32_t *attributes)
{
int ret;
pthread_mutex_lock(&ctrlr->lock);
ret = nvme_admin_get_feature(ctrlr, sel, feature, cdw11, attributes);
if (ret != 0)
nvme_notice("Get feature 0x%08x failed\n",
(unsigned int) feature);
pthread_mutex_unlock(&ctrlr->lock);
return ret;
}
int nvme_ctrlr_set_feature(struct nvme_ctrlr *ctrlr,
bool save, enum nvme_feat feature,
uint32_t cdw11, uint32_t cdw12,
uint32_t cdw13, uint32_t cdw14, uint32_t cdw15,
void *buf, uint32_t len,
uint32_t *attributes)
{
int ret;
pthread_mutex_lock(&ctrlr->lock);
ret = nvme_admin_set_feature(ctrlr, save, feature,
cdw11, cdw12, cdw13, cdw14, cdw15, buf, len, attributes);
if (ret != 0)
nvme_notice("Set feature 0x%08x failed\n",
(unsigned int) feature);
pthread_mutex_unlock(&ctrlr->lock);
return ret;
}
int nvme_ctrlr_attach_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid,
struct nvme_ctrlr_list *clist)
{
int ret;
pthread_mutex_lock(&ctrlr->lock);
ret = nvme_admin_attach_ns(ctrlr, nsid, clist);
if (ret) {
nvme_notice("Attach namespace %u failed\n", nsid);
goto out;
}
ret = nvme_ctrlr_reset(ctrlr);
if (ret != 0)
nvme_notice("Reset controller failed\n");
out:
pthread_mutex_unlock(&ctrlr->lock);
return ret;
}
int nvme_ctrlr_detach_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid,
struct nvme_ctrlr_list *clist)
{
int ret;
pthread_mutex_lock(&ctrlr->lock);
ret = nvme_admin_detach_ns(ctrlr, nsid, clist);
if (ret != 0) {
nvme_notice("Detach namespace %u failed\n", nsid);
goto out;
}
ret = nvme_ctrlr_reset(ctrlr);
if (ret)
nvme_notice("Reset controller failed\n");
out:
pthread_mutex_unlock(&ctrlr->lock);
return ret;
}
unsigned int nvme_ctrlr_create_ns(struct nvme_ctrlr *ctrlr,
struct nvme_ns_data *nsdata)
{
unsigned int nsid;
int ret;
pthread_mutex_lock(&ctrlr->lock);
ret = nvme_admin_create_ns(ctrlr, nsdata, &nsid);
if (ret != 0) {
nvme_notice("Create namespace failed\n");
nsid = 0;
}
pthread_mutex_unlock(&ctrlr->lock);
return nsid;
}
int nvme_ctrlr_delete_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid)
{
int ret;
pthread_mutex_lock(&ctrlr->lock);
ret = nvme_admin_delete_ns(ctrlr, nsid);
if (ret != 0) {
nvme_notice("Delete namespace %u failed\n", nsid);
goto out;
}
ret = nvme_ctrlr_reset(ctrlr);
if (ret)
nvme_notice("Reset controller failed\n");
out:
pthread_mutex_unlock(&ctrlr->lock);
return ret;
}
int nvme_ctrlr_format_ns(struct nvme_ctrlr *ctrlr, unsigned int nsid,
struct nvme_format *format)
{
int ret;
pthread_mutex_lock(&ctrlr->lock);
ret = nvme_admin_format_nvm(ctrlr, nsid, format);
if (ret != 0) {
if (nsid == NVME_GLOBAL_NS_TAG)
nvme_notice("Format device failed\n");
else
nvme_notice("Format namespace %u failed\n", nsid);
goto out;
}
ret = nvme_ctrlr_reset(ctrlr);
if (ret)
nvme_notice("Reset controller failed\n");
out:
pthread_mutex_unlock(&ctrlr->lock);
return ret;
}
int nvme_ctrlr_update_firmware(struct nvme_ctrlr *ctrlr,
void *fw, size_t size, int slot)
{
struct nvme_fw_commit fw_commit;
unsigned int size_remaining = size, offset = 0, transfer;
void *f = fw;
int ret;
if (size & 0x3) {
nvme_err("Invalid firmware size\n");
return EINVAL;
}
pthread_mutex_lock(&ctrlr->lock);
while (size_remaining > 0) {
transfer = nvme_min(size_remaining, ctrlr->min_page_size);
ret = nvme_admin_fw_image_dl(ctrlr, f, transfer, offset);
if (ret != 0) {
nvme_err("Download FW (%u B at %u) failed\n",
transfer, offset);
goto out;
}
f += transfer;
offset += transfer;
size_remaining -= transfer;
}
memset(&fw_commit, 0, sizeof(struct nvme_fw_commit));
fw_commit.fs = slot;
fw_commit.ca = NVME_FW_COMMIT_REPLACE_IMG;
ret = nvme_admin_fw_commit(ctrlr, &fw_commit);
if (ret != 0) {
nvme_err("Commit downloaded FW (%zu B) failed\n",
size);
goto out;
}
ret = nvme_ctrlr_reset(ctrlr);
if (ret)
nvme_notice("Reset controller failed\n");
out:
pthread_mutex_unlock(&ctrlr->lock);
return ret;
}
struct nvme_qpair *nvme_ioqp_get(struct nvme_ctrlr *ctrlr,
enum nvme_qprio qprio, unsigned int qd)
{
struct nvme_qpair *qpair = NULL;
union nvme_cc_register cc;
uint32_t trackers;
int ret;
cc.raw = nvme_reg_mmio_read_4(ctrlr, cc.raw);
if ((qprio & 3) != qprio)
return NULL;
if ((cc.bits.ams == NVME_CC_AMS_RR) && (qprio != NVME_QPRIO_URGENT)) {
nvme_err("Invalid queue priority for default round "
"robin arbitration method\n");
return NULL;
}
if (qd == 1) {
nvme_err("Invalid queue depth\n");
return NULL;
}
if (qd == 0 || qd > ctrlr->io_qpairs_max_entries)
qd = ctrlr->io_qpairs_max_entries;
trackers = nvme_min(NVME_IO_TRACKERS, (qd - 1));
pthread_mutex_lock(&ctrlr->lock);
qpair = TAILQ_FIRST(&ctrlr->free_io_qpairs);
if (qpair == NULL) {
nvme_err("No free I/O queue pairs\n");
goto out;
}
ret = nvme_qpair_construct(ctrlr, qpair, qprio, qd, trackers);
if (ret != 0) {
nvme_qpair_destroy(qpair);
qpair = NULL;
goto out;
}
if (nvme_ctrlr_create_qpair(ctrlr, qpair) != 0) {
nvme_err("Create queue pair on the controller failed\n");
nvme_qpair_destroy(qpair);
qpair = NULL;
goto out;
}
TAILQ_REMOVE(&ctrlr->free_io_qpairs, qpair, tailq);
TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq);
out:
pthread_mutex_unlock(&ctrlr->lock);
return qpair;
}
int nvme_ioqp_release(struct nvme_qpair *qpair)
{
struct nvme_ctrlr *ctrlr;
int ret;
if (qpair == NULL)
return 0;
ctrlr = qpair->ctrlr;
pthread_mutex_lock(&ctrlr->lock);
ret = nvme_ctrlr_delete_qpair(ctrlr, qpair);
if (ret != 0) {
nvme_notice("Delete queue pair %u failed\n", qpair->id);
} else {
TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq);
TAILQ_INSERT_HEAD(&ctrlr->free_io_qpairs, qpair, tailq);
}
pthread_mutex_unlock(&ctrlr->lock);
return ret;
}
int nvme_ioqp_submit_cmd(struct nvme_qpair *qpair,
struct nvme_cmd *cmd,
void *buf, size_t len,
nvme_cmd_cb cb_fn, void *cb_arg)
{
struct nvme_request *req;
int ret = ENOMEM;
req = nvme_request_allocate_contig(qpair, buf, len, cb_fn, cb_arg);
if (req) {
memcpy(&req->cmd, cmd, sizeof(req->cmd));
ret = nvme_qpair_submit_request(qpair, req);
}
return ret;
}