root/sys/dev/nvme/nvme_sim.c
/*-
 * Copyright (c) 2016 Netflix, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer,
 *    without modification, immediately at the beginning of the file.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/bus.h>
#include <sys/conf.h>
#include <sys/ioccom.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/smp.h>

#include <cam/cam.h>
#include <cam/cam_ccb.h>
#include <cam/cam_sim.h>
#include <cam/cam_xpt_sim.h>
#include <cam/cam_debug.h>

#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>

#include "nvme_private.h"

#include "nvme_if.h"

#define ccb_accb_ptr spriv_ptr0
#define ccb_ctrlr_ptr spriv_ptr1
static void     nvme_sim_action(struct cam_sim *sim, union ccb *ccb);
static void     nvme_sim_poll(struct cam_sim *sim);
static int      nvme_sim_ns_added(device_t dev, struct nvme_namespace *ns);
static int      nvme_sim_ns_changed(device_t dev, uint32_t nsid);
static int      nvme_sim_ns_removed(device_t dev, struct nvme_namespace *ns);

#define sim2softc(sim)  ((struct nvme_sim_softc *)cam_sim_softc(sim))
#define sim2ctrlr(sim)  (sim2softc(sim)->s_ctrlr)

struct nvme_sim_softc
{
        struct nvme_controller  *s_ctrlr;
        struct cam_sim          *s_sim;
        struct cam_path         *s_path;
};

static void
nvme_sim_nvmeio_done(void *ccb_arg, const struct nvme_completion *cpl)
{
        union ccb *ccb = (union ccb *)ccb_arg;

        /*
         * Let the periph know the completion, and let it sort out what
         * it means. Report an error or success based on SC and SCT.
         * We do not try to fetch additional data from the error log,
         * though maybe we should in the future.
         */
        memcpy(&ccb->nvmeio.cpl, cpl, sizeof(*cpl));
        ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
        if (nvme_completion_is_error(cpl)) {
                ccb->ccb_h.status = CAM_NVME_STATUS_ERROR;
                xpt_done(ccb);
        } else {
                ccb->ccb_h.status = CAM_REQ_CMP;
                xpt_done_direct(ccb);
        }
}

static void
nvme_sim_nvmeio(struct cam_sim *sim, union ccb *ccb)
{
        struct ccb_nvmeio       *nvmeio = &ccb->nvmeio;
        struct nvme_request     *req;
        void                    *payload;
        uint32_t                size;
        struct nvme_controller *ctrlr;

        ctrlr = sim2ctrlr(sim);
        payload = nvmeio->data_ptr;
        size = nvmeio->dxfer_len;
        /* SG LIST ??? */
        if ((nvmeio->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
                req = nvme_allocate_request_bio((struct bio *)payload,
                    M_NOWAIT, nvme_sim_nvmeio_done, ccb);
        else if ((nvmeio->ccb_h.flags & CAM_DATA_SG) == CAM_DATA_SG)
                req = nvme_allocate_request_ccb(ccb, M_NOWAIT,
                    nvme_sim_nvmeio_done, ccb);
        else if (payload == NULL)
                req = nvme_allocate_request_null(M_NOWAIT, nvme_sim_nvmeio_done,
                    ccb);
        else
                req = nvme_allocate_request_vaddr(payload, size, M_NOWAIT,
                    nvme_sim_nvmeio_done, ccb);
        if (req == NULL) {
                nvmeio->ccb_h.status = CAM_RESRC_UNAVAIL;
                xpt_done(ccb);
                return;
        }
        ccb->ccb_h.status |= CAM_SIM_QUEUED;

        memcpy(&req->cmd, &ccb->nvmeio.cmd, sizeof(ccb->nvmeio.cmd));

        if (ccb->ccb_h.func_code == XPT_NVME_IO)
                nvme_ctrlr_submit_io_request(ctrlr, req);
        else
                nvme_ctrlr_submit_admin_request(ctrlr, req);
}

static uint32_t
nvme_link_kBps(struct nvme_controller *ctrlr)
{
        uint32_t speed, lanes, link[] = { 1, 250000, 500000, 985000, 1970000 };
        uint32_t status;

        status = pcie_read_config(ctrlr->dev, PCIER_LINK_STA, 2);
        speed = status & PCIEM_LINK_STA_SPEED;
        lanes = (status & PCIEM_LINK_STA_WIDTH) >> 4;
        /*
         * Failsafe on link speed indicator. If it is insane report the number of
         * lanes as the speed. Not 100% accurate, but may be diagnostic.
         */
        if (speed >= nitems(link))
                speed = 0;
        return link[speed] * lanes;
}

static void
nvme_sim_action(struct cam_sim *sim, union ccb *ccb)
{
        struct nvme_controller *ctrlr;

        CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE,
            ("nvme_sim_action: func= %#x\n",
                ccb->ccb_h.func_code));

        ctrlr = sim2ctrlr(sim);

        switch (ccb->ccb_h.func_code) {
        case XPT_CALC_GEOMETRY:         /* Calculate Geometry Totally nuts ? XXX */
                /* 
                 * Only meaningful for old-school SCSI disks since only the SCSI
                 * da driver generates them. Reject all these that slip through.
                 */
                /*FALLTHROUGH*/
        case XPT_ABORT:                 /* Abort the specified CCB */
                ccb->ccb_h.status = CAM_REQ_INVALID;
                break;
        case XPT_SET_TRAN_SETTINGS:
                /*
                 * NVMe doesn't really have different transfer settings, but
                 * other parts of CAM think failure here is a big deal.
                 */
                ccb->ccb_h.status = CAM_REQ_CMP;
                break;
        case XPT_PATH_INQ:              /* Path routing inquiry */
        {
                struct ccb_pathinq      *cpi = &ccb->cpi;
                device_t                dev = ctrlr->dev;

                /*
                 * For devices that are reported as children of the AHCI
                 * controller, which has no access to the config space for this
                 * controller, report the AHCI controller's data.
                 */
                if (ctrlr->quirks & QUIRK_AHCI)
                        dev = device_get_parent(dev);
                cpi->version_num = 1;
                cpi->hba_inquiry = 0;
                cpi->target_sprt = 0;
                cpi->hba_misc =  PIM_UNMAPPED | PIM_NOSCAN;
                cpi->hba_eng_cnt = 0;
                cpi->max_target = 0;
                cpi->max_lun = ctrlr->cdata.nn;
                cpi->maxio = ctrlr->max_xfer_size;
                cpi->initiator_id = 0;
                cpi->bus_id = cam_sim_bus(sim);
                cpi->base_transfer_speed = nvme_link_kBps(ctrlr);
                strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
                strlcpy(cpi->hba_vid, "NVMe", HBA_IDLEN);
                strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
                cpi->unit_number = cam_sim_unit(sim);
                cpi->transport = XPORT_NVME;            /* XXX XPORT_PCIE ? */
                cpi->transport_version = nvme_mmio_read_4(ctrlr, vs);
                cpi->protocol = PROTO_NVME;
                cpi->protocol_version = nvme_mmio_read_4(ctrlr, vs);
                cpi->xport_specific.nvme.nsid = xpt_path_lun_id(ccb->ccb_h.path);
                cpi->xport_specific.nvme.domain = pci_get_domain(dev);
                cpi->xport_specific.nvme.bus = pci_get_bus(dev);
                cpi->xport_specific.nvme.slot = pci_get_slot(dev);
                cpi->xport_specific.nvme.function = pci_get_function(dev);
                cpi->xport_specific.nvme.progif = pci_get_progif(dev);
                strlcpy(cpi->xport_specific.nvme.dev_name, device_get_nameunit(dev),
                    sizeof(cpi->xport_specific.nvme.dev_name));
                cpi->hba_vendor = pci_get_vendor(dev);
                cpi->hba_device = pci_get_device(dev);
                cpi->hba_subvendor = pci_get_subvendor(dev);
                cpi->hba_subdevice = pci_get_subdevice(dev);
                cpi->ccb_h.status = CAM_REQ_CMP;
                break;
        }
        case XPT_GET_TRAN_SETTINGS:     /* Get transport settings */
        {
                struct ccb_trans_settings       *cts;
                struct ccb_trans_settings_nvme  *nvmep;
                struct ccb_trans_settings_nvme  *nvmex;
                device_t dev;
                uint32_t status, caps, flags;

                dev = ctrlr->dev;
                cts = &ccb->cts;
                nvmex = &cts->xport_specific.nvme;
                nvmep = &cts->proto_specific.nvme;

                nvmex->spec = nvme_mmio_read_4(ctrlr, vs);
                nvmex->valid = CTS_NVME_VALID_SPEC;
                if ((ctrlr->quirks & QUIRK_AHCI) == 0) {
                        /* AHCI redirect makes it impossible to query */
                        status = pcie_read_config(dev, PCIER_LINK_STA, 2);
                        caps = pcie_read_config(dev, PCIER_LINK_CAP, 2);
                        flags = pcie_read_config(dev, PCIER_FLAGS, 2);
                        if ((flags & PCIEM_FLAGS_TYPE) == PCIEM_TYPE_ENDPOINT) {
                                nvmex->valid |= CTS_NVME_VALID_LINK;
                                nvmex->speed = status & PCIEM_LINK_STA_SPEED;
                                nvmex->lanes = (status & PCIEM_LINK_STA_WIDTH) >> 4;
                                nvmex->max_speed = caps & PCIEM_LINK_CAP_MAX_SPEED;
                                nvmex->max_lanes = (caps & PCIEM_LINK_CAP_MAX_WIDTH) >> 4;
                        }
                }

                /* XXX these should be something else maybe ? */
                nvmep->valid = CTS_NVME_VALID_SPEC;
                nvmep->spec = nvmex->spec;

                cts->transport = XPORT_NVME;
                cts->transport_version = nvmex->spec;
                cts->protocol = PROTO_NVME;
                cts->protocol_version = nvmex->spec;
                cts->ccb_h.status = CAM_REQ_CMP;
                break;
        }
        case XPT_TERM_IO:               /* Terminate the I/O process */
                /*
                 * every driver handles this, but nothing generates it. Assume
                 * it's OK to just say 'that worked'.
                 */
                /*FALLTHROUGH*/
        case XPT_RESET_DEV:             /* Bus Device Reset the specified device */
        case XPT_RESET_BUS:             /* Reset the specified bus */
                /*
                 * NVMe doesn't really support physically resetting the bus. It's part
                 * of the bus scanning dance, so return sucess to tell the process to
                 * proceed.
                 */
                ccb->ccb_h.status = CAM_REQ_CMP;
                break;
        case XPT_NVME_IO:               /* Execute the requested I/O operation */
                if (ctrlr->is_failed) {
                        /*
                         * I/O came in while we were failing the drive, so drop
                         * it. Once falure is complete, we'll be destroyed.
                         */
                        ccb->ccb_h.status = CAM_DEV_NOT_THERE;
                        break;
                }
                nvme_sim_nvmeio(sim, ccb);
                return;                 /* no done */
        case XPT_NVME_ADMIN:            /* or Admin operation */
                if (ctrlr->is_failed_admin) {
                        /*
                         * Admin request came in when we can't send admin
                         * commands, so drop it. Once falure is complete, we'll
                         * be destroyed.
                         */
                        ccb->ccb_h.status = CAM_DEV_NOT_THERE;
                        break;
                }
                nvme_sim_nvmeio(sim, ccb);
                return;                 /* no done */
        default:
                ccb->ccb_h.status = CAM_REQ_INVALID;
                break;
        }
        xpt_done(ccb);
}

static void
nvme_sim_poll(struct cam_sim *sim)
{
        nvme_ctrlr_poll(sim2ctrlr(sim));
}

static int
nvme_sim_probe(device_t dev)
{
        if (nvme_use_nvd)
                return (ENXIO);
        /*
         * Only do storage devices with CAM. NVMHCI 1.0 interfaces are the only
         * ones that have namespaces with LBA ranges on them.
         */
        if (pci_get_progif(device_get_parent(dev)) !=
            PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0)
                return (ENXIO);

        device_set_desc(dev, "nvme cam");
        return (BUS_PROBE_DEFAULT);
}

static int
nvme_sim_attach(device_t dev)
{
        struct cam_devq *devq;
        int max_trans;
        int err = ENXIO;
        struct nvme_sim_softc *sc = device_get_softc(dev);
        struct nvme_controller  *ctrlr = device_get_ivars(dev);

        max_trans = ctrlr->max_hw_pend_io;
        devq = cam_simq_alloc(max_trans);
        if (devq == NULL)
                return (ENOMEM);

        sc->s_ctrlr = ctrlr;

        sc->s_sim = cam_sim_alloc(nvme_sim_action, nvme_sim_poll,
            "nvme", sc, device_get_unit(ctrlr->dev),
            NULL, max_trans, max_trans, devq);
        if (sc->s_sim == NULL) {
                device_printf(dev, "Failed to allocate a sim\n");
                cam_simq_free(devq);
                return (ENOMEM);
        }
        if (xpt_bus_register(sc->s_sim, dev, 0) != CAM_SUCCESS) {
                device_printf(dev, "Failed to create a bus\n");
                goto err2;
        }
        if (xpt_create_path(&sc->s_path, /*periph*/NULL, cam_sim_path(sc->s_sim),
            CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
                device_printf(dev, "Failed to create a path\n");
                goto err3;
        }

        for (int i = 0; i < min(ctrlr->cdata.nn, NVME_MAX_NAMESPACES); i++) {
                struct nvme_namespace   *ns = &ctrlr->ns[i];

                if (ns->data.nsze == 0)
                        continue;
                nvme_sim_ns_added(dev, ns);
        }

        return (0);
err3:
        xpt_bus_deregister(cam_sim_path(sc->s_sim));
err2:
        cam_sim_free(sc->s_sim, /*free_devq*/TRUE);
        return (err);
}


static int
nvme_sim_fail_all_ns(device_t dev)
{
        struct nvme_sim_softc *sc = device_get_softc(dev);
        struct nvme_controller *ctrlr = sc->s_ctrlr;

        for (int i = 0; i < min(ctrlr->cdata.nn, NVME_MAX_NAMESPACES); i++) {
                struct nvme_namespace   *ns = &ctrlr->ns[i];

                if (ns->data.nsze == 0)
                        continue;
                nvme_sim_ns_removed(dev, ns);
        }
        return (0);
}

static int
nvme_sim_detach(device_t dev)
{
        return (nvme_sim_fail_all_ns(dev));
}

static int
nvme_sim_ns_added(device_t dev, struct nvme_namespace *ns)
{
        struct nvme_sim_softc *sc = device_get_softc(dev);
        union ccb *ccb;

        /*
         * If we have no namespaces, then we both do not attach the nvme_sim_ns
         * device. And then get a ns changed AER as well to tell us about it
         * (which is how we get here). If there's no device attached, then
         * there's nothing to do. sc->s_sim will be NULL as well (since it
         * only gets set when we attach).
         */
        if (!device_is_attached(dev))
                return (0);

        /*
         * We map the NVMe namespace idea onto the CAM unit LUN. For each new
         * namespace, scan or rescan the path to enumerate it.
         */
        ccb = xpt_alloc_ccb();
        if (xpt_create_path(&ccb->ccb_h.path, /*periph*/NULL,
            cam_sim_path(sc->s_sim), 0, ns->id) != CAM_REQ_CMP) {
                printf("unable to create path for rescan\n");
                return (ENOMEM);
        }
        xpt_rescan(ccb);

        return (0);
}

static int
nvme_sim_ns_removed(device_t dev, struct nvme_namespace *ns)
{
        struct nvme_sim_softc *sc = device_get_softc(dev);
        struct cam_path *tmppath;

        if (xpt_create_path(&tmppath, /*periph*/NULL,
            cam_sim_path(sc->s_sim), 0, ns->id) != CAM_REQ_CMP) {
                printf("unable to create path for ns removal\n");
                return (ENOMEM);
        }
        xpt_async(AC_LOST_DEVICE, tmppath, NULL);
        xpt_free_path(tmppath);

        return (0);
}

static int
nvme_sim_ns_changed(device_t dev, uint32_t nsid)
{
        struct nvme_sim_softc *sc = device_get_softc(dev);
        struct nvme_namespace *ns = &sc->s_ctrlr->ns[nsid - 1];

        /*
         * These wind up being the same. For a configured cam_ed, we generate
         * AC_GETDEV_CHANGED, but for new one we do AC_FOUND_DEVICE, but the
         * scan is the same.
         */
        return (nvme_sim_ns_added(dev, ns));
}

static int
nvme_sim_controller_failed(device_t dev)
{
        return (nvme_sim_fail_all_ns(dev));
}

static int
nvme_sim_handle_aen(device_t dev, const struct nvme_completion *cpl,
    uint32_t pg_nr, void *page, uint32_t page_len)
{
        /* Do nothing */
        return (0);
}

static device_method_t nvme_sim_methods[] = {
        /* Device interface */
        DEVMETHOD(device_probe,         nvme_sim_probe),
        DEVMETHOD(device_attach,        nvme_sim_attach),
        DEVMETHOD(device_detach,        nvme_sim_detach),
        /* Nvme controller messages */
        DEVMETHOD(nvme_ns_added,        nvme_sim_ns_added),
        DEVMETHOD(nvme_ns_removed,      nvme_sim_ns_removed),
        DEVMETHOD(nvme_ns_changed,      nvme_sim_ns_changed),
        DEVMETHOD(nvme_controller_failed, nvme_sim_controller_failed),
        DEVMETHOD(nvme_handle_aen,      nvme_sim_handle_aen),
        DEVMETHOD_END
};

static driver_t nvme_sim_driver = {
        "nvme_sim",
        nvme_sim_methods,
        sizeof(struct nvme_sim_softc),
};

DRIVER_MODULE(nvme_sim, nvme, nvme_sim_driver, NULL, NULL);
MODULE_VERSION(nvme_sim, 1);