root/usr/src/uts/common/io/sfxge/sfxge_mcdi.c
/*
 * Copyright (c) 2009-2016 Solarflare Communications Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * The views and conclusions contained in the software and documentation are
 * those of the authors and should not be interpreted as representing official
 * policies, either expressed or implied, of the FreeBSD Project.
 */

#include <sys/types.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/stream.h>
#include <sys/dlpi.h>
#include <sys/pci.h>

#include "sfxge.h"
#include "efsys.h"
#include "efx.h"
#include "efx_mcdi.h"
#include "efx_regs_mcdi.h"

/* MAC DMA attributes */
static ddi_device_acc_attr_t sfxge_mcdi_devacc = {

        DDI_DEVICE_ATTR_V0,     /* devacc_attr_version */
        DDI_NEVERSWAP_ACC,      /* devacc_attr_endian_flags */
        DDI_STRICTORDER_ACC     /* devacc_attr_dataorder */
};

static ddi_dma_attr_t sfxge_mcdi_dma_attr = {
        DMA_ATTR_V0,            /* dma_attr_version     */
        0,                      /* dma_attr_addr_lo     */
        0xffffffffffffffffull,  /* dma_attr_addr_hi     */
        0xffffffffffffffffull,  /* dma_attr_count_max   */
        0x1000,                 /* dma_attr_align       */
        0xffffffff,             /* dma_attr_burstsizes  */
        1,                      /* dma_attr_minxfer     */
        0xffffffffffffffffull,  /* dma_attr_maxxfer     */
        0xffffffffffffffffull,  /* dma_attr_seg         */
        1,                      /* dma_attr_sgllen      */
        1,                      /* dma_attr_granular    */
        0                       /* dma_attr_flags       */
};

/*
 * Notes on MCDI operation:
 * ------------------------
 * MCDI requests can be made in arbitrary thread context, and as a synchronous
 * API must therefore block until the response is available from the MC, or
 * a watchdog timeout occurs.
 *
 * This interacts badly with the limited number of worker threads (2 per CPU)
 * used by the Solaris callout subsystem to invoke timeout handlers. If both
 * worker threads are blocked (e.g. waiting for a condvar or mutex) then timeout
 * processing is deadlocked on that CPU, causing system failure.
 *
 * For this reason the driver does not use event based MCDI completion, as this
 * leads to numerous paths involving timeouts and reentrant GLDv3 entrypoints
 * that result in a deadlocked system.
 */
#define SFXGE_MCDI_POLL_INTERVAL        10              /* 10us in 1us units */
#define SFXGE_MCDI_WATCHDOG_INTERVAL    10000000        /* 10s in 1us units */


/* Acquire exclusive access to MCDI for the duration of a request */
static void
sfxge_mcdi_acquire(sfxge_mcdi_t *smp)
{
        mutex_enter(&(smp->sm_lock));
        ASSERT3U(smp->sm_state, !=, SFXGE_MCDI_UNINITIALIZED);

        while (smp->sm_state != SFXGE_MCDI_INITIALIZED) {
                (void) cv_wait(&(smp->sm_kv), &(smp->sm_lock));
        }
        smp->sm_state = SFXGE_MCDI_BUSY;

        mutex_exit(&(smp->sm_lock));
}


/* Release ownership of MCDI on request completion */
static void
sfxge_mcdi_release(sfxge_mcdi_t *smp)
{
        mutex_enter(&(smp->sm_lock));
        ASSERT((smp->sm_state == SFXGE_MCDI_BUSY) ||
            (smp->sm_state == SFXGE_MCDI_COMPLETED));

        smp->sm_state = SFXGE_MCDI_INITIALIZED;
        cv_broadcast(&(smp->sm_kv));

        mutex_exit(&(smp->sm_lock));
}


static void
sfxge_mcdi_timeout(sfxge_t *sp)
{
        dev_info_t *dip = sp->s_dip;

        dev_err(dip, CE_WARN, SFXGE_CMN_ERR "MC_TIMEOUT");

        DTRACE_PROBE(mcdi_timeout);
        (void) sfxge_restart_dispatch(sp, DDI_SLEEP, SFXGE_HW_ERR,
            "MCDI timeout", 0);
}


static void
sfxge_mcdi_poll(sfxge_t *sp)
{
        efx_nic_t *enp = sp->s_enp;
        clock_t timeout;
        boolean_t aborted;

        /* Poll until request completes or timeout */
        timeout = ddi_get_lbolt() + drv_usectohz(SFXGE_MCDI_WATCHDOG_INTERVAL);
        while (efx_mcdi_request_poll(enp) == B_FALSE) {

                /* No response received yet */
                if (ddi_get_lbolt() > timeout) {
                        /* Timeout expired */
                        goto fail;
                }

                /* Short delay to avoid excessive PCIe traffic */
                drv_usecwait(SFXGE_MCDI_POLL_INTERVAL);
        }

        /* Request completed (or polling failed) */
        return;

fail:
        /* Timeout before request completion */
        DTRACE_PROBE(fail);
        aborted = efx_mcdi_request_abort(enp);
        ASSERT(aborted);
        sfxge_mcdi_timeout(sp);
}


static void
sfxge_mcdi_execute(void *arg, efx_mcdi_req_t *emrp)
{
        sfxge_t *sp = (sfxge_t *)arg;
        sfxge_mcdi_t *smp = &(sp->s_mcdi);

        sfxge_mcdi_acquire(smp);

        /* Issue request and poll for completion */
        efx_mcdi_request_start(sp->s_enp, emrp, B_FALSE);
        sfxge_mcdi_poll(sp);

        sfxge_mcdi_release(smp);
}


static void
sfxge_mcdi_ev_cpl(void *arg)
{
        sfxge_t *sp = (sfxge_t *)arg;
        sfxge_mcdi_t *smp = &(sp->s_mcdi);

        mutex_enter(&(smp->sm_lock));
        ASSERT(smp->sm_state == SFXGE_MCDI_BUSY);
        smp->sm_state = SFXGE_MCDI_COMPLETED;
        cv_broadcast(&(smp->sm_kv));
        mutex_exit(&(smp->sm_lock));
}


static void
sfxge_mcdi_exception(void *arg, efx_mcdi_exception_t eme)
{
        sfxge_t *sp = (sfxge_t *)arg;
        const char *reason;

        if (eme == EFX_MCDI_EXCEPTION_MC_REBOOT)
                reason = "MC_REBOOT";
        else if (eme == EFX_MCDI_EXCEPTION_MC_BADASSERT)
                reason = "MC_BADASSERT";
        else
                reason = "MC_UNKNOWN";

        DTRACE_PROBE(mcdi_exception);
        /* sfxge_evq_t->se_lock held */
        (void) sfxge_restart_dispatch(sp, DDI_SLEEP, SFXGE_HW_ERR, reason, 0);
}

#if EFSYS_OPT_MCDI_LOGGING
#define SFXGE_MCDI_LOG_BUF_SIZE 128

static size_t
sfxge_mcdi_do_log(char *buffer, void *data, size_t data_size,
    size_t pfxsize, size_t position)
{
        uint32_t *words = data;
        size_t i;

        for (i = 0; i < data_size; i += sizeof (*words)) {
                if (position + 2 * sizeof (*words) + 1 >=
                    SFXGE_MCDI_LOG_BUF_SIZE) {
                        buffer[position] = '\0';
                        cmn_err(CE_NOTE, "%s \\", buffer);
                        position = pfxsize;
                }
                snprintf(buffer + position, SFXGE_MCDI_LOG_BUF_SIZE - position,
                    " %08x", *words);
                words++;
                position += 2 * sizeof (uint32_t) + 1;
        }
        return (position);
}


static void
sfxge_mcdi_logger(void *arg, efx_log_msg_t type,
    void *header, size_t header_size, void *data, size_t data_size)
{
        sfxge_t *sp = (sfxge_t *)arg;
        char buffer[SFXGE_MCDI_LOG_BUF_SIZE];
        size_t pfxsize;
        size_t start;

        if (!sp->s_mcdi_logging)
                return;

        pfxsize = snprintf(buffer, sizeof (buffer),
            "sfc %04x:%02x:%02x.%02x %s%d MCDI RPC %s:",
            0,
            PCI_REG_BUS_G(sp->s_bus_addr),
            PCI_REG_DEV_G(sp->s_bus_addr),
            PCI_REG_FUNC_G(sp->s_bus_addr),
            ddi_driver_name(sp->s_dip),
            ddi_get_instance(sp->s_dip),
            type == EFX_LOG_MCDI_REQUEST ? "REQ" :
            type == EFX_LOG_MCDI_RESPONSE ? "RESP" : "???");
        start = sfxge_mcdi_do_log(buffer, header, header_size,
            pfxsize, pfxsize);
        start = sfxge_mcdi_do_log(buffer, data, data_size, pfxsize, start);
        if (start != pfxsize) {
                buffer[start] = '\0';
                cmn_err(CE_NOTE, "%s", buffer);
        }
}
#endif /* EFSYS_OPT_MCDI_LOGGING */

int
sfxge_mcdi_init(sfxge_t *sp)
{
        efx_nic_t *enp = sp->s_enp;
        sfxge_mcdi_t *smp = &(sp->s_mcdi);
        efsys_mem_t *esmp = &(smp->sm_mem);
        efx_mcdi_transport_t *emtp = &(smp->sm_emt);
        sfxge_dma_buffer_attr_t dma_attr;
        int msg_buf_size;
        int rc;

        ASSERT3U(smp->sm_state, ==, SFXGE_MCDI_UNINITIALIZED);

        msg_buf_size = sizeof (uint32_t) + MCDI_CTL_SDU_LEN_MAX_V2;

        /* Allocate host DMA buffer for MCDI commands */
        dma_attr.sdba_dip        = sp->s_dip;
        dma_attr.sdba_dattrp     = &sfxge_mcdi_dma_attr;
        dma_attr.sdba_callback   = DDI_DMA_SLEEP;
        dma_attr.sdba_length     = msg_buf_size;
        dma_attr.sdba_memflags   = DDI_DMA_CONSISTENT;
        dma_attr.sdba_devaccp    = &sfxge_mcdi_devacc;
        dma_attr.sdba_bindflags  = DDI_DMA_RDWR | DDI_DMA_CONSISTENT;
        dma_attr.sdba_maxcookies = 1;
        dma_attr.sdba_zeroinit   = B_TRUE;

        if ((rc = sfxge_dma_buffer_create(esmp, &dma_attr)) != 0)
                goto fail1;

        mutex_init(&(smp->sm_lock), NULL, MUTEX_DRIVER, NULL);

        smp->sm_state = SFXGE_MCDI_INITIALIZED;

        emtp->emt_context   = sp;
        emtp->emt_dma_mem   = esmp;
        emtp->emt_execute   = sfxge_mcdi_execute;
        emtp->emt_ev_cpl    = sfxge_mcdi_ev_cpl;
        emtp->emt_exception = sfxge_mcdi_exception;
#if EFSYS_OPT_MCDI_LOGGING
        emtp->emt_logger    = sfxge_mcdi_logger;
#endif

        cv_init(&(smp->sm_kv), NULL, CV_DRIVER, NULL);

        if ((rc = efx_mcdi_init(enp, emtp)) != 0)
                goto fail2;

        return (0);

fail2:
        DTRACE_PROBE(fail2);

        cv_destroy(&(smp->sm_kv));
        mutex_destroy(&(smp->sm_lock));

        sfxge_dma_buffer_destroy(esmp);

        smp->sm_state = SFXGE_MCDI_UNINITIALIZED;
        smp->sm_sp = NULL;
        SFXGE_OBJ_CHECK(smp, sfxge_mcdi_t);

fail1:
        DTRACE_PROBE1(fail1, int, rc);

        return (rc);
}


void
sfxge_mcdi_fini(sfxge_t *sp)
{
        efx_nic_t *enp = sp->s_enp;
        sfxge_mcdi_t *smp = &(sp->s_mcdi);
        efsys_mem_t *esmp = &(smp->sm_mem);
        efx_mcdi_transport_t *emtp;

        mutex_enter(&(smp->sm_lock));
        ASSERT3U(smp->sm_state, ==, SFXGE_MCDI_INITIALIZED);

        efx_mcdi_fini(enp);
        emtp = &(smp->sm_emt);
        bzero(emtp, sizeof (*emtp));

        smp->sm_sp = NULL;

        cv_destroy(&(smp->sm_kv));
        mutex_exit(&(smp->sm_lock));

        sfxge_dma_buffer_destroy(esmp);

        mutex_destroy(&(smp->sm_lock));

        smp->sm_state = SFXGE_MCDI_UNINITIALIZED;
        SFXGE_OBJ_CHECK(smp, sfxge_mcdi_t);
}


int
sfxge_mcdi_ioctl(sfxge_t *sp, sfxge_mcdi_ioc_t *smip)
{
        const efx_nic_cfg_t *encp = efx_nic_cfg_get(sp->s_enp);
        sfxge_mcdi_t *smp = &(sp->s_mcdi);
        efx_mcdi_req_t emr;
        uint8_t *out;
        int rc;

        if (smp->sm_state == SFXGE_MCDI_UNINITIALIZED) {
                rc = ENODEV;
                goto fail1;
        }

        if (!(encp->enc_features & EFX_FEATURE_MCDI)) {
                rc = ENOTSUP;
                goto fail2;
        }

        out = kmem_zalloc(sizeof (smip->smi_payload), KM_NOSLEEP);
        if (out == NULL) {
                rc = ENOMEM;
                goto fail3;
        }

        emr.emr_cmd = smip->smi_cmd;
        emr.emr_in_buf = smip->smi_payload;
        emr.emr_in_length = smip->smi_len;

        emr.emr_out_buf = out;
        emr.emr_out_length = sizeof (smip->smi_payload);

        sfxge_mcdi_execute(sp, &emr);

        smip->smi_rc = (uint8_t)emr.emr_rc;
        smip->smi_cmd = (uint8_t)emr.emr_cmd;
        smip->smi_len = (uint8_t)emr.emr_out_length_used;
        bcopy(out, smip->smi_payload, smip->smi_len);

        /*
         * Helpfully trigger a device reset in response to an MCDI_CMD_REBOOT
         * Both ports will see ->emt_exception callbacks on the next MCDI poll
         */
        if (smip->smi_cmd == MC_CMD_REBOOT) {

                DTRACE_PROBE(mcdi_ioctl_mc_reboot);
                /* sfxge_t->s_state_lock held */
                (void) sfxge_restart_dispatch(sp, DDI_SLEEP, SFXGE_HW_OK,
                    "MC_REBOOT triggering restart", 0);
        }

        kmem_free(out, sizeof (smip->smi_payload));

        return (0);

fail3:
        DTRACE_PROBE(fail3);
fail2:
        DTRACE_PROBE(fail2);
fail1:
        DTRACE_PROBE1(fail1, int, rc);
        return (rc);
}

int
sfxge_mcdi2_ioctl(sfxge_t *sp, sfxge_mcdi2_ioc_t *smip)
{
        const efx_nic_cfg_t *encp = efx_nic_cfg_get(sp->s_enp);
        sfxge_mcdi_t *smp = &(sp->s_mcdi);
        efx_mcdi_req_t emr;
        uint8_t *out;
        int rc;

        if (smp->sm_state == SFXGE_MCDI_UNINITIALIZED) {
                rc = ENODEV;
                goto fail1;
        }

        if (!(encp->enc_features & EFX_FEATURE_MCDI)) {
                rc = ENOTSUP;
                goto fail2;
        }

        out = kmem_zalloc(sizeof (smip->smi_payload), KM_NOSLEEP);
        if (out == NULL) {
                rc = ENOMEM;
                goto fail3;
        }

        emr.emr_cmd = smip->smi_cmd;
        emr.emr_in_buf = smip->smi_payload;
        emr.emr_in_length = smip->smi_len;

        emr.emr_out_buf = out;
        emr.emr_out_length = sizeof (smip->smi_payload);

        sfxge_mcdi_execute(sp, &emr);

        smip->smi_rc = emr.emr_rc;
        smip->smi_cmd = emr.emr_cmd;
        smip->smi_len = (uint32_t)emr.emr_out_length_used;
        bcopy(out, smip->smi_payload, smip->smi_len);

        /*
         * Helpfully trigger a device reset in response to an MCDI_CMD_REBOOT
         * Both ports will see ->emt_exception callbacks on the next MCDI poll
         */
        if (smip->smi_cmd == MC_CMD_REBOOT) {

                DTRACE_PROBE(mcdi_ioctl_mc_reboot);
                /* sfxge_t->s_state_lock held */
                (void) sfxge_restart_dispatch(sp, DDI_SLEEP, SFXGE_HW_OK,
                    "MC_REBOOT triggering restart", 0);
        }

        kmem_free(out, sizeof (smip->smi_payload));

        return (0);

fail3:
        DTRACE_PROBE(fail3);
fail2:
        DTRACE_PROBE(fail2);
fail1:
        DTRACE_PROBE1(fail1, int, rc);
        return (rc);
}