root/sys/dev/dpaa2/dpaa2_swp.c
/*-
 * SPDX-License-Identifier: BSD-3-Clause AND BSD-2-Clause
 *
 * Copyright © 2014-2016 Freescale Semiconductor, Inc.
 * Copyright © 2016-2019 NXP
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * 3. Neither the name of the copyright holder nor the names of its
 *    contributors may be used to endorse or promote products derived from this
 *    software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 * Original source file obtained from:
 * drivers/soc/fsl/dpio/qbman-portal.c
 *
 * Commit: 4c86114194e644b6da9107d75910635c9e87179e
 * Repository: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
 */

/*
 * Copyright © 2021-2022 Dmitry Salychev
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
/*
 * DPAA2 QBMan software portal.
 */

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/rman.h>
#include <sys/module.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/lock.h>

#include <machine/bus.h>
#include <machine/resource.h>
#include <machine/atomic.h>

#include "pcib_if.h"
#include "pci_if.h"

#include "dpaa2_swp.h"
#include "dpaa2_mc.h"
#include "dpaa2_bp.h"

#define CMD_SPIN_TIMEOUT                100u    /* us */
#define CMD_SPIN_ATTEMPTS               2000u   /* 200 ms max. */

#define CMD_VERB_MASK                   0x7Fu

/* Shifts in the VERB byte of the enqueue command descriptor. */
#define ENQ_CMD_ORP_ENABLE_SHIFT        2
#define ENQ_CMD_IRQ_ON_DISPATCH_SHIFT   3
#define ENQ_CMD_TARGET_TYPE_SHIFT       4
#define ENQ_CMD_DCA_EN_SHIFT            7
/* VERB byte options of the enqueue command descriptor. */
#define ENQ_CMD_EMPTY                   0u
#define ENQ_CMD_RESPONSE_ALWAYS         1u
#define ENQ_CMD_REJECTS_TO_FQ           2u

#define ENQ_DESC_FD_OFFSET              32u

#define ENQ_DCA_IDXMASK                 0x0Fu
#define ENQ_FLAG_DCA                    (1ull << 31)

/* QBMan portal command codes. */
#define CMDID_SWP_MC_ACQUIRE            0x30
#define CMDID_SWP_BP_QUERY              0x32
#define CMDID_SWP_WQCHAN_CONFIGURE      0x46

/* QBMan portal command result codes. */
#define QBMAN_CMD_RC_OK                 0xF0

/* SDQCR attribute codes */
#define QB_SDQCR_FC_SHIFT               29u
#define QB_SDQCR_FC_MASK                0x1u
#define QB_SDQCR_DCT_SHIFT              24u
#define QB_SDQCR_DCT_MASK               0x3u
#define QB_SDQCR_TOK_SHIFT              16u
#define QB_SDQCR_TOK_MASK               0xFFu
#define QB_SDQCR_SRC_SHIFT              0u
#define QB_SDQCR_SRC_MASK               0xFFFFu

/* Shifts in the VERB byte of the volatile dequeue command. */
#define QB_VDQCR_VERB_DCT0_SHIFT        0
#define QB_VDQCR_VERB_DCT1_SHIFT        1
#define QB_VDQCR_VERB_DT0_SHIFT         2
#define QB_VDQCR_VERB_DT1_SHIFT         3
#define QB_VDQCR_VERB_RLS_SHIFT         4
#define QB_VDQCR_VERB_WAE_SHIFT         5
#define QB_VDQCR_VERB_RAD_SHIFT         6

/* Maximum timeout period for the DQRR interrupt. */
#define DQRR_MAX_ITP                    4096u
#define DQRR_PI_MASK                    0x0Fu

/* Release Array Allocation register helpers. */
#define RAR_IDX(rar)                    ((rar) & 0x7u)
#define RAR_VB(rar)                     ((rar) & 0x80u)
#define RAR_SUCCESS(rar)                ((rar) & 0x100u)

MALLOC_DEFINE(M_DPAA2_SWP, "dpaa2_swp", "DPAA2 QBMan Software Portal");

enum qbman_sdqcr_dct {
        qbman_sdqcr_dct_null = 0,
        qbman_sdqcr_dct_prio_ics,
        qbman_sdqcr_dct_active_ics,
        qbman_sdqcr_dct_active
};

enum qbman_sdqcr_fc {
        qbman_sdqcr_fc_one = 0,
        qbman_sdqcr_fc_up_to_3 = 1
};

/* Routines to execute software portal commands. */
static int dpaa2_swp_exec_mgmt_command(struct dpaa2_swp *,
    struct dpaa2_swp_cmd *, struct dpaa2_swp_rsp *, uint8_t);
static int dpaa2_swp_exec_br_command(struct dpaa2_swp *, struct dpaa2_swp_cmd *,
    uint32_t);
static int dpaa2_swp_exec_vdc_command_locked(struct dpaa2_swp *,
    struct dpaa2_swp_cmd *);

/* Management Commands helpers. */
static int dpaa2_swp_send_mgmt_command(struct dpaa2_swp *,
    struct dpaa2_swp_cmd *, uint8_t);
static int dpaa2_swp_wait_for_mgmt_response(struct dpaa2_swp *,
    struct dpaa2_swp_rsp *);

/* Helper subroutines. */
static int dpaa2_swp_cyc_diff(uint8_t, uint8_t, uint8_t);

int
dpaa2_swp_init_portal(struct dpaa2_swp **swp, struct dpaa2_swp_desc *desc,
    uint16_t flags)
{
        struct dpaa2_swp *p;
        uint32_t reg, mask_size, eqcr_pi; /* EQCR producer index */

        if (!swp || !desc)
                return (DPAA2_SWP_STAT_EINVAL);

        p = malloc(sizeof(struct dpaa2_swp), M_DPAA2_SWP,
            flags & DPAA2_SWP_NOWAIT_ALLOC
            ? (M_NOWAIT | M_ZERO)
            : (M_WAITOK | M_ZERO));
        if (!p)
                return (DPAA2_SWP_STAT_NO_MEMORY);

        mtx_init(&p->lock, "swp_sleep_lock", NULL, MTX_DEF);

        p->cfg.mem_backed = false;
        p->cfg.writes_cinh = true;

        p->desc = desc;
        p->flags = flags;
        p->mc.valid_bit = DPAA2_SWP_VALID_BIT;
        p->mr.valid_bit = DPAA2_SWP_VALID_BIT;

        /* FIXME: Memory-backed mode doesn't work now. Why? */
        p->cena_res = desc->cena_res;
        p->cena_map = desc->cena_map;
        p->cinh_res = desc->cinh_res;
        p->cinh_map = desc->cinh_map;

        /* Static Dequeue Command Register configuration. */
        p->sdq = 0;
        p->sdq |= qbman_sdqcr_dct_prio_ics << QB_SDQCR_DCT_SHIFT;
        p->sdq |= qbman_sdqcr_fc_up_to_3 << QB_SDQCR_FC_SHIFT;
        p->sdq |= DPAA2_SWP_SDQCR_TOKEN << QB_SDQCR_TOK_SHIFT;

        /* Volatile Dequeue Command configuration. */
        p->vdq.valid_bit = DPAA2_SWP_VALID_BIT;

        /* Dequeue Response Ring configuration */
        p->dqrr.next_idx = 0;
        p->dqrr.valid_bit = DPAA2_SWP_VALID_BIT;
        if ((desc->swp_version & DPAA2_SWP_REV_MASK) < DPAA2_SWP_REV_4100) {
                p->dqrr.ring_size = 4;
                p->dqrr.reset_bug = 1;
        } else {
                p->dqrr.ring_size = 8;
                p->dqrr.reset_bug = 0;
        }

        if ((desc->swp_version & DPAA2_SWP_REV_MASK) < DPAA2_SWP_REV_5000) {
                reg = dpaa2_swp_set_cfg(
                    p->dqrr.ring_size, /* max. entries QMan writes to DQRR */
                    1, /* writes enabled in the CINH memory only */
                    0, /* EQCR_CI stashing threshold */
                    3, /* RPM: RCR in array mode */
                    2, /* DCM: Discrete consumption ack */
                    2, /* EPM: EQCR in ring mode (FIFO) */
                    1, /* mem stashing drop enable enable */
                    1, /* mem stashing priority enable */
                    1, /* mem stashing enable */
                    1, /* dequeue stashing priority enable */
                    0, /* dequeue stashing enable enable */
                    0  /* EQCR_CI stashing priority enable */
                );
                reg &= ~(1 << DPAA2_SWP_CFG_CPBS_SHIFT); /* QMan-backed mode */
        } else {
                bus_set_region_4(p->cena_map, 0, 0,
                    rman_get_size(p->cena_res) / 4);

                reg = dpaa2_swp_set_cfg(
                    p->dqrr.ring_size, /* max. entries QMan writes to DQRR */                                   /* DQRR_MF */
                    1, /* writes enabled in the CINH memory only */                                             /* WN */
                    0, /* EQCR_CI stashing is disabled */                                                       /* EST */
                    3, /* RPM: RCR in array mode */                                                             /* RPM */
                    2, /* DCM: Discrete consumption ack */                                                      /* DCM */
                    2, /* EPM: EQCR in ring mode (FIFO) */                                                      /* EPM */
                    1, /* Dequeued frame data, annotation, and FQ context stashing drop enable */               /* SD */
                    1, /* Dequeued frame data, annotation, and FQ context stashing priority */                  /* SP */
                    1, /* Dequeued frame data, annotation, and FQ context stashing enable */                    /* SE */
                    1, /* Dequeue response ring (DQRR) entry stashing priority */                               /* DP */
                    0, /* Dequeue response ring (DQRR) entry, or cacheable portal area, stashing enable. */     /* DE */
                    0  /* EQCR_CI stashing priority */                                                          /* EP */
                );
                /* TODO: Switch to memory-backed mode. */
                reg &= ~(1 << DPAA2_SWP_CFG_CPBS_SHIFT); /* QMan-backed mode */
        }
        dpaa2_swp_write_reg(p, DPAA2_SWP_CINH_CFG, reg);
        reg = dpaa2_swp_read_reg(p, DPAA2_SWP_CINH_CFG);
        if (!reg) {
                free(p, M_DPAA2_SWP);
                return (DPAA2_SWP_STAT_PORTAL_DISABLED);
        }

        /*
         * Static Dequeue Command Register needs to be initialized to 0 when no
         * channels are being dequeued from or else the QMan HW will indicate an
         * error. The values that were calculated above will be applied when
         * dequeues from a specific channel are enabled.
         */
        dpaa2_swp_write_reg(p, DPAA2_SWP_CINH_SDQCR, 0);

        p->eqcr.pi_ring_size = 8;
        /* if ((desc->swp_version & DPAA2_SWP_REV_MASK) >= DPAA2_SWP_REV_5000) */
        /*      p->eqcr.pi_ring_size = 32; */

        for (mask_size = p->eqcr.pi_ring_size; mask_size > 0; mask_size >>= 1)
                p->eqcr.pi_ci_mask = (p->eqcr.pi_ci_mask << 1) + 1;

        eqcr_pi = dpaa2_swp_read_reg(p, DPAA2_SWP_CINH_EQCR_PI);
        p->eqcr.pi = eqcr_pi & p->eqcr.pi_ci_mask;
        p->eqcr.pi_vb = eqcr_pi & DPAA2_SWP_VALID_BIT;
        p->eqcr.ci = dpaa2_swp_read_reg(p, DPAA2_SWP_CINH_EQCR_CI)
            & p->eqcr.pi_ci_mask;
        p->eqcr.available = p->eqcr.pi_ring_size;

        /* TODO: sysctl(9) for the IRQ timeout? */
        /* Initialize the portal with an IRQ threshold and timeout of 120us. */
        dpaa2_swp_set_irq_coalescing(p, p->dqrr.ring_size - 1, 120);

        *swp = p;

        return (0);
}

void
dpaa2_swp_free_portal(struct dpaa2_swp *swp)
{
        uint16_t flags;

        KASSERT(swp != NULL, ("%s: swp is NULL", __func__));

        DPAA2_SWP_LOCK(swp, &flags);
        swp->flags |= DPAA2_SWP_DESTROYED;
        DPAA2_SWP_UNLOCK(swp);

        /* Let threads stop using this portal. */
        DELAY(DPAA2_SWP_TIMEOUT);

        mtx_destroy(&swp->lock);
        free(swp, M_DPAA2_SWP);
}

uint32_t
dpaa2_swp_set_cfg(uint8_t max_fill, uint8_t wn, uint8_t est, uint8_t rpm,
    uint8_t dcm, uint8_t epm, int sd, int sp, int se, int dp, int de, int ep)
{
        return (
            max_fill    << DPAA2_SWP_CFG_DQRR_MF_SHIFT |
            est         << DPAA2_SWP_CFG_EST_SHIFT |
            wn          << DPAA2_SWP_CFG_WN_SHIFT |
            rpm         << DPAA2_SWP_CFG_RPM_SHIFT |
            dcm         << DPAA2_SWP_CFG_DCM_SHIFT |
            epm         << DPAA2_SWP_CFG_EPM_SHIFT |
            sd          << DPAA2_SWP_CFG_SD_SHIFT |
            sp          << DPAA2_SWP_CFG_SP_SHIFT |
            se          << DPAA2_SWP_CFG_SE_SHIFT |
            dp          << DPAA2_SWP_CFG_DP_SHIFT |
            de          << DPAA2_SWP_CFG_DE_SHIFT |
            ep          << DPAA2_SWP_CFG_EP_SHIFT
        );
}

/* Read/write registers of a software portal. */

void
dpaa2_swp_write_reg(struct dpaa2_swp *swp, uint32_t o, uint32_t v)
{
        bus_write_4(swp->cinh_map, o, v);
}

uint32_t
dpaa2_swp_read_reg(struct dpaa2_swp *swp, uint32_t o)
{
        return (bus_read_4(swp->cinh_map, o));
}

/* Helper routines. */

/**
 * @brief Set enqueue descriptor without Order Point Record ID.
 *
 * ed:          Enqueue descriptor.
 * resp_always: Enqueue with response always (1); FD from a rejected enqueue
 *              will be returned on a FQ (0).
 */
void
dpaa2_swp_set_ed_norp(struct dpaa2_eq_desc *ed, bool resp_always)
{
        ed->verb &= ~(1 << ENQ_CMD_ORP_ENABLE_SHIFT);
        if (resp_always)
                ed->verb |= ENQ_CMD_RESPONSE_ALWAYS;
        else
                ed->verb |= ENQ_CMD_REJECTS_TO_FQ;
}

/**
 * @brief Set FQ of the enqueue descriptor.
 */
void
dpaa2_swp_set_ed_fq(struct dpaa2_eq_desc *ed, uint32_t fqid)
{
        ed->verb &= ~(1 << ENQ_CMD_TARGET_TYPE_SHIFT);
        ed->tgtid = fqid;
}

/**
 * @brief Enable interrupts for a software portal.
 */
void
dpaa2_swp_set_intr_trigger(struct dpaa2_swp *swp, uint32_t mask)
{
        if (swp != NULL)
                dpaa2_swp_write_reg(swp, DPAA2_SWP_CINH_IER, mask);
}

/**
 * @brief Return the value in the SWP_IER register.
 */
uint32_t
dpaa2_swp_get_intr_trigger(struct dpaa2_swp *swp)
{
        if (swp != NULL)
                return dpaa2_swp_read_reg(swp, DPAA2_SWP_CINH_IER);
        return (0);
}

/**
 * @brief Return the value in the SWP_ISR register.
 */
uint32_t
dpaa2_swp_read_intr_status(struct dpaa2_swp *swp)
{
        if (swp != NULL)
                return dpaa2_swp_read_reg(swp, DPAA2_SWP_CINH_ISR);
        return (0);
}

/**
 * @brief Clear SWP_ISR register according to the given mask.
 */
void
dpaa2_swp_clear_intr_status(struct dpaa2_swp *swp, uint32_t mask)
{
        if (swp != NULL)
                dpaa2_swp_write_reg(swp, DPAA2_SWP_CINH_ISR, mask);
}

/**
 * @brief Enable or disable push dequeue.
 *
 * swp:         the software portal object
 * chan_idx:    the channel index (0 to 15)
 * en:          enable or disable push dequeue
 */
void
dpaa2_swp_set_push_dequeue(struct dpaa2_swp *swp, uint8_t chan_idx, bool en)
{
        uint16_t dqsrc;

        if (swp != NULL) {
                if (chan_idx > 15u) {
                        device_printf(swp->desc->dpio_dev, "channel index "
                            "should be <= 15: chan_idx=%d\n", chan_idx);
                        return;
                }

                if (en)
                        swp->sdq |= 1 << chan_idx;
                else
                        swp->sdq &= ~(1 << chan_idx);
                /*
                 * Read make the complete src map. If no channels are enabled
                 * the SDQCR must be 0 or else QMan will assert errors.
                 */
                dqsrc = (swp->sdq >> DPAA2_SDQCR_SRC_SHIFT) &
                    DPAA2_SDQCR_SRC_MASK;
                dpaa2_swp_write_reg(swp, DPAA2_SWP_CINH_SDQCR, dqsrc != 0
                    ? swp->sdq : 0);
        }
}

/**
 * @brief Set new IRQ coalescing values.
 *
 * swp:         The software portal object.
 * threshold:   Threshold for DQRR interrupt generation. The DQRR interrupt
 *              asserts when the ring contains greater than "threshold" entries.
 * holdoff:     DQRR interrupt holdoff (timeout) period in us.
 */
int dpaa2_swp_set_irq_coalescing(struct dpaa2_swp *swp, uint32_t threshold,
    uint32_t holdoff)
{
        uint32_t itp; /* Interrupt Timeout Period */

        if (swp == NULL)
                return (EINVAL);
        
        /*
         * Convert "holdoff" value from us to 256 QBMAN clock cycles
         * increments. This depends on the QBMAN internal frequency.
         */
        itp = (holdoff * 1000u) / swp->desc->swp_cycles_ratio;
        if (itp > DQRR_MAX_ITP)
                itp = DQRR_MAX_ITP;
        if (threshold >= swp->dqrr.ring_size)
                threshold = swp->dqrr.ring_size - 1;

        swp->dqrr.irq_threshold = threshold;
        swp->dqrr.irq_itp = itp;

        dpaa2_swp_write_reg(swp, DPAA2_SWP_CINH_DQRR_ITR, threshold);
        dpaa2_swp_write_reg(swp, DPAA2_SWP_CINH_ITPR, itp);

        return (0);
}

/*
 * Software portal commands.
 */

/**
 * @brief Configure the channel data availability notification (CDAN)
 * in a particular WQ channel.
 */
int
dpaa2_swp_conf_wq_channel(struct dpaa2_swp *swp, uint16_t chan_id,
    uint8_t we_mask, bool cdan_en, uint64_t ctx)
{
        /* NOTE: 64 bytes command. */
        struct __packed {
                uint8_t         verb;
                uint8_t         result; /* in response only! */
                uint16_t        chan_id;
                uint8_t         we;
                uint8_t         ctrl;
                uint16_t        _reserved2;
                uint64_t        ctx;
                uint8_t         _reserved3[48];
        } cmd = {0};
        struct __packed {
                uint8_t         verb;
                uint8_t         result;
                uint16_t        chan_id;
                uint8_t         _reserved[60];
        } rsp;
        int error;

        if (swp == NULL)
                return (EINVAL);

        cmd.chan_id = chan_id;
        cmd.we = we_mask;
        cmd.ctrl = cdan_en ? 1u : 0u;
        cmd.ctx = ctx;  

        error = dpaa2_swp_exec_mgmt_command(swp, (struct dpaa2_swp_cmd *) &cmd,
            (struct dpaa2_swp_rsp *) &rsp, CMDID_SWP_WQCHAN_CONFIGURE);
        if (error)
                return (error);

        if (rsp.result != QBMAN_CMD_RC_OK) {
                device_printf(swp->desc->dpio_dev, "WQ channel configuration "
                    "error: channel_id=%d, result=0x%02x\n", chan_id,
                    rsp.result);
                return (EIO);
        }

        return (0);
}

/**
 * @brief Query current configuration/state of the buffer pool.
 */
int
dpaa2_swp_query_bp(struct dpaa2_swp *swp, uint16_t bpid,
    struct dpaa2_bp_conf *conf)
{
        /* NOTE: 64 bytes command. */
        struct __packed {
                uint8_t         verb;
                uint8_t         _reserved1;
                uint16_t        bpid;
                uint8_t         _reserved2[60];
        } cmd = {0};
        struct __packed {
                uint8_t         verb;
                uint8_t         result;
                uint32_t        _reserved1;
                uint8_t         bdi;
                uint8_t         state;
                uint32_t        fill;
                /* TODO: Support the other fields as well. */
                uint8_t         _reserved2[52];
        } rsp;
        int error;

        if (swp == NULL || conf == NULL)
                return (EINVAL);

        cmd.bpid = bpid;

        error = dpaa2_swp_exec_mgmt_command(swp, (struct dpaa2_swp_cmd *) &cmd,
            (struct dpaa2_swp_rsp *) &rsp, CMDID_SWP_BP_QUERY);
        if (error)
                return (error);

        if (rsp.result != QBMAN_CMD_RC_OK) {
                device_printf(swp->desc->dpio_dev, "BP query error: bpid=%d, "
                    "result=0x%02x\n", bpid, rsp.result);
                return (EIO);
        }

        conf->bdi = rsp.bdi;
        conf->state = rsp.state;
        conf->free_bufn = rsp.fill;

        return (0);
}

int
dpaa2_swp_release_bufs(struct dpaa2_swp *swp, uint16_t bpid, bus_addr_t *buf,
    uint32_t buf_num)
{
        /* NOTE: 64 bytes command. */
        struct __packed {
                uint8_t         verb;
                uint8_t         _reserved1;
                uint16_t        bpid;
                uint32_t        _reserved2;
                uint64_t        buf[DPAA2_SWP_BUFS_PER_CMD];
        } cmd = {0};
        int error;

        if (swp == NULL || buf == NULL || buf_num == 0u ||
            buf_num > DPAA2_SWP_BUFS_PER_CMD)
                return (EINVAL);

        for (uint32_t i = 0; i < buf_num; i++)
                cmd.buf[i] = buf[i];
        cmd.bpid = bpid;
        cmd.verb |= 1 << 5; /* Switch release buffer command to valid. */

        error = dpaa2_swp_exec_br_command(swp, (struct dpaa2_swp_cmd *) &cmd,
            buf_num);
        if (error) {
                device_printf(swp->desc->dpio_dev, "buffers release command "
                    "failed\n");
                return (error);
        }

        return (0);
}

int
dpaa2_swp_dqrr_next_locked(struct dpaa2_swp *swp, struct dpaa2_dq *dq,
    uint32_t *idx)
{
        struct resource_map *map = swp->cinh_map;
        struct dpaa2_swp_rsp *rsp = (struct dpaa2_swp_rsp *) dq;
        uint32_t verb, pi; /* producer index */
        uint32_t offset = swp->cfg.mem_backed
            ? DPAA2_SWP_CENA_DQRR_MEM(swp->dqrr.next_idx)
            : DPAA2_SWP_CENA_DQRR(swp->dqrr.next_idx);

        if (swp == NULL || dq == NULL)
                return (EINVAL);

        /*
         * Before using valid-bit to detect if something is there, we have to
         * handle the case of the DQRR reset bug...
         */
        if (swp->dqrr.reset_bug) {
                /*
                 * We pick up new entries by cache-inhibited producer index,
                 * which means that a non-coherent mapping would require us to
                 * invalidate and read *only* once that PI has indicated that
                 * there's an entry here. The first trip around the DQRR ring
                 * will be much less efficient than all subsequent trips around
                 * it...
                 */
                pi = dpaa2_swp_read_reg(swp, DPAA2_SWP_CINH_DQPI) & DQRR_PI_MASK;

                /* There are new entries if pi != next_idx */
                if (pi == swp->dqrr.next_idx)
                        return (ENOENT);

                /*
                 * If next_idx is/was the last ring index, and 'pi' is
                 * different, we can disable the workaround as all the ring
                 * entries have now been DMA'd to so valid-bit checking is
                 * repaired.
                 *
                 * NOTE: This logic needs to be based on next_idx (which
                 *       increments one at a time), rather than on pi (which
                 *       can burst and wrap-around between our snapshots of it).
                 */
                if (swp->dqrr.next_idx == (swp->dqrr.ring_size - 1))
                        swp->dqrr.reset_bug = 0;
        }

        verb = bus_read_4(map, offset);
        if ((verb & DPAA2_SWP_VALID_BIT) != swp->dqrr.valid_bit)
                return (ENOENT);

        /* Read dequeue response message. */
        for (int i = 0; i < DPAA2_SWP_RSP_PARAMS_N; i++)
                rsp->params[i] = bus_read_8(map, offset + i * sizeof(uint64_t));

        /* Return index of the current entry (if requested). */
        if (idx != NULL)
                *idx = swp->dqrr.next_idx;

        /*
         * There's something there. Move "next_idx" attention to the next ring
         * entry before returning what we found.
         */
        swp->dqrr.next_idx++;
        swp->dqrr.next_idx &= swp->dqrr.ring_size - 1; /* wrap around */
        if (swp->dqrr.next_idx == 0u)
                swp->dqrr.valid_bit ^= DPAA2_SWP_VALID_BIT;

        return (0);
}

int
dpaa2_swp_pull(struct dpaa2_swp *swp, uint16_t chan_id, struct dpaa2_buf *buf,
    uint32_t frames_n)
{
        /* NOTE: 64 bytes command. */
        struct __packed {
                uint8_t         verb;
                uint8_t         numf;
                uint8_t         tok;
                uint8_t         _reserved;
                uint32_t        dq_src;
                uint64_t        rsp_addr;
                uint64_t        _reserved1[6];
        } cmd = {0};
        struct dpaa2_dq *msg;
        uint16_t flags;
        int i, error;

        KASSERT(frames_n != 0u, ("%s: cannot pull zero frames", __func__));
        KASSERT(frames_n <= 16u, ("%s: too much frames to pull", __func__));

        cmd.numf = frames_n - 1;
        cmd.tok = DPAA2_SWP_VDQCR_TOKEN;
        cmd.dq_src = chan_id;
        cmd.rsp_addr = (uint64_t)buf->paddr;

        /* Dequeue command type */
        cmd.verb &= ~(1 << QB_VDQCR_VERB_DCT0_SHIFT);
        cmd.verb |=  (1 << QB_VDQCR_VERB_DCT1_SHIFT);
        /* Dequeue from a specific software portal channel (ID's in DQ_SRC). */
        cmd.verb &= ~(1 << QB_VDQCR_VERB_DT0_SHIFT);
        cmd.verb &= ~(1 << QB_VDQCR_VERB_DT1_SHIFT);
        /* Write the response to this command into memory (at the RSP_ADDR). */
        cmd.verb |=  (1 << QB_VDQCR_VERB_RLS_SHIFT);
        /* Response writes won't attempt to allocate into a cache. */
        cmd.verb &= ~(1 << QB_VDQCR_VERB_WAE_SHIFT);
        /* Allow the FQ to remain active in the portal after dequeue. */
        cmd.verb &= ~(1 << QB_VDQCR_VERB_RAD_SHIFT);

        DPAA2_SWP_LOCK(swp, &flags);
        if (flags & DPAA2_SWP_DESTROYED) {
                /* Terminate operation if portal is destroyed. */
                DPAA2_SWP_UNLOCK(swp);
                return (ENOENT);
        }

        error = dpaa2_swp_exec_vdc_command_locked(swp,
            (struct dpaa2_swp_cmd *) &cmd);
        if (error != 0) {
                DPAA2_SWP_UNLOCK(swp);
                return (error);
        }

        /* Let's sync before reading VDQ response from QBMan. */
        bus_dmamap_sync(buf->dmat, buf->dmap, BUS_DMASYNC_POSTREAD);

        /* Read VDQ response from QBMan. */
        msg = (struct dpaa2_dq *)buf->vaddr;
        for (i = 1; i <= CMD_SPIN_ATTEMPTS; i++) {
                if ((msg->fdr.desc.stat & DPAA2_DQ_STAT_VOLATILE) &&
                    (msg->fdr.desc.tok == DPAA2_SWP_VDQCR_TOKEN)) {
                        /* Reset token. */
                        msg->fdr.desc.tok = 0;
                        break;
                }
                DELAY(CMD_SPIN_TIMEOUT);
        }
        DPAA2_SWP_UNLOCK(swp);

        /* Return an error on expired timeout. */
        return (i > CMD_SPIN_ATTEMPTS ? ETIMEDOUT : 0);
}

/**
 * @brief Issue a command to enqueue a frame using one enqueue descriptor.
 *
 * swp:         Software portal used to send this command to.
 * ed:          Enqueue command descriptor.
 * fd:          Frame descriptor to enqueue.
 */
int
dpaa2_swp_enq(struct dpaa2_swp *swp, struct dpaa2_eq_desc *ed,
    struct dpaa2_fd *fd)
{
        uint32_t flags = 0;
        int rc = dpaa2_swp_enq_mult(swp, ed, fd, &flags, 1);

        return (rc >= 0 ? 0 : EBUSY);
}

/**
 * @brief Issue a command to enqueue frames using one enqueue descriptor.
 *
 * swp:         Software portal used to send this command to.
 * ed:          Enqueue command descriptor.
 * fd:          Frame descriptor to enqueue.
 * flags:       Table pointer of QBMAN_ENQUEUE_FLAG_DCA flags, not used if NULL.
 * frames_n:    Number of FDs to enqueue.
 *
 * NOTE: Enqueue command (64 bytes): 32 (eq. descriptor) + 32 (frame descriptor).
 */
int
dpaa2_swp_enq_mult(struct dpaa2_swp *swp, struct dpaa2_eq_desc *ed,
    struct dpaa2_fd *fd, uint32_t *flags, int frames_n)
{
        const uint8_t  *ed_pdat8 =  (const uint8_t *) ed;
        const uint32_t *ed_pdat32 = (const uint32_t *) ed;
        const uint64_t *ed_pdat64 = (const uint64_t *) ed;
        const uint64_t *fd_pdat64 = (const uint64_t *) fd;
        struct resource_map *map;
        uint32_t eqcr_ci, eqcr_pi; /* EQCR consumer/producer index */
        uint32_t half_mask, full_mask, val, ci_offset;
        uint16_t swp_flags;
        int num_enq = 0;

        if (swp == NULL || ed == NULL || fd == NULL || flags == NULL ||
            frames_n == 0)
                return (EINVAL);

        DPAA2_SWP_LOCK(swp, &swp_flags);
        if (swp_flags & DPAA2_SWP_DESTROYED) {
                /* Terminate operation if portal is destroyed. */
                DPAA2_SWP_UNLOCK(swp);
                return (ENOENT);
        }

        map = swp->cfg.writes_cinh ? swp->cinh_map : swp->cena_map;
        ci_offset = swp->cfg.mem_backed
            ? DPAA2_SWP_CENA_EQCR_CI_MEMBACK
            : DPAA2_SWP_CENA_EQCR_CI;

        half_mask = swp->eqcr.pi_ci_mask >> 1;
        full_mask = swp->eqcr.pi_ci_mask;

        if (swp->eqcr.available == 0) {
                val = dpaa2_swp_read_reg(swp, ci_offset);
                eqcr_ci = swp->eqcr.ci;
                swp->eqcr.ci = val & full_mask;

                swp->eqcr.available = dpaa2_swp_cyc_diff(swp->eqcr.pi_ring_size,
                    eqcr_ci, swp->eqcr.ci);

                if (swp->eqcr.available == 0) {
                        DPAA2_SWP_UNLOCK(swp);
                        return (0);
                }
        }

        eqcr_pi = swp->eqcr.pi;
        num_enq = swp->eqcr.available < frames_n
            ? swp->eqcr.available : frames_n;
        swp->eqcr.available -= num_enq;

        KASSERT(num_enq >= 0 && num_enq <= swp->eqcr.pi_ring_size,
            ("%s: unexpected num_enq=%d", __func__, num_enq));
        KASSERT(swp->eqcr.available >= 0 &&
            swp->eqcr.available <= swp->eqcr.pi_ring_size,
            ("%s: unexpected eqcr.available=%d", __func__, swp->eqcr.available));

        /* Fill in the EQCR ring. */
        for (int i = 0; i < num_enq; i++) {
                /* Write enq. desc. without the VERB, DCA, SEQNUM and OPRID. */
                for (int j = 1; j <= 3; j++)
                        bus_write_8(map,
                            DPAA2_SWP_CENA_EQCR(eqcr_pi & half_mask) +
                            sizeof(uint64_t) * j, ed_pdat64[j]);
                /* Write OPRID. */
                bus_write_4(map,
                    DPAA2_SWP_CENA_EQCR(eqcr_pi & half_mask) + sizeof(uint32_t),
                    ed_pdat32[1]);
                /* Write DCA and SEQNUM without VERB byte. */
                for (int j = 1; j <= 3; j++)
                        bus_write_1(map,
                            DPAA2_SWP_CENA_EQCR(eqcr_pi & half_mask) +
                            sizeof(uint8_t) * j, ed_pdat8[j]);

                /* Write frame descriptor. */
                for (int j = 0; j <= 3; j++)
                        bus_write_8(map,
                            DPAA2_SWP_CENA_EQCR(eqcr_pi & half_mask) +
                            ENQ_DESC_FD_OFFSET +
                            sizeof(uint64_t) * j, fd_pdat64[j]);
                eqcr_pi++;
        }

        wmb();

        /* Write the VERB byte of enqueue descriptor. */
        eqcr_pi = swp->eqcr.pi;
        for (int i = 0; i < num_enq; i++) {
                bus_write_1(map,
                    DPAA2_SWP_CENA_EQCR(eqcr_pi & half_mask),
                    ed_pdat8[0] | swp->eqcr.pi_vb);

                if (flags && (flags[i] & ENQ_FLAG_DCA)) {
                        /* Update DCA byte. */
                        bus_write_1(map,
                            DPAA2_SWP_CENA_EQCR(eqcr_pi & half_mask) + 1,
                            (1 << ENQ_CMD_DCA_EN_SHIFT) |
                            (flags[i] & ENQ_DCA_IDXMASK));
                }
                eqcr_pi++;
                if (!(eqcr_pi & half_mask))
                        swp->eqcr.pi_vb ^= DPAA2_SWP_VALID_BIT;
        }
        swp->eqcr.pi = eqcr_pi & full_mask;

        DPAA2_SWP_UNLOCK(swp);

        return (num_enq);
}

static int
dpaa2_swp_cyc_diff(uint8_t ringsize, uint8_t first, uint8_t last)
{
        /* 'first' is included, 'last' is excluded */
        return ((first <= last)
            ? (last - first) : ((2 * ringsize) - (first - last)));
}

/**
 * @brief Execute Buffer Release Command (BRC).
 */
static int
dpaa2_swp_exec_br_command(struct dpaa2_swp *swp, struct dpaa2_swp_cmd *cmd,
    uint32_t buf_num)
{
        struct __packed with_verb {
                uint8_t verb;
                uint8_t _reserved[63];
        } *c;
        const uint8_t *cmd_pdat8 = (const uint8_t *) cmd->params;
        const uint32_t *cmd_pdat32 = (const uint32_t *) cmd->params;
        struct resource_map *map;
        uint32_t offset, rar; /* Release Array Allocation register */
        uint16_t flags;

        if (!swp || !cmd)
                return (EINVAL);

        DPAA2_SWP_LOCK(swp, &flags);
        if (flags & DPAA2_SWP_DESTROYED) {
                /* Terminate operation if portal is destroyed. */
                DPAA2_SWP_UNLOCK(swp);
                return (ENOENT);
        }

        rar = dpaa2_swp_read_reg(swp, DPAA2_SWP_CINH_RAR);
        if (!RAR_SUCCESS(rar)) {
                DPAA2_SWP_UNLOCK(swp);
                return (EBUSY);
        }

        map = swp->cfg.writes_cinh ? swp->cinh_map : swp->cena_map;
        offset = swp->cfg.mem_backed
            ? DPAA2_SWP_CENA_RCR_MEM(RAR_IDX(rar))
            : DPAA2_SWP_CENA_RCR(RAR_IDX(rar));
        c = (struct with_verb *) cmd;

        /* Write command bytes (without VERB byte). */
        for (uint32_t i = 1; i < DPAA2_SWP_CMD_PARAMS_N; i++)
                bus_write_8(map, offset + sizeof(uint64_t) * i, cmd->params[i]);
        bus_write_4(map, offset + 4, cmd_pdat32[1]);
        for (uint32_t i = 1; i <= 3; i++)
                bus_write_1(map, offset + i, cmd_pdat8[i]);

        /* Write VERB byte and trigger command execution. */
        if (swp->cfg.mem_backed) {
                bus_write_1(map, offset, c->verb | RAR_VB(rar) | buf_num);
                wmb();
                dpaa2_swp_write_reg(swp, DPAA2_SWP_CINH_RCR_AM_RT +
                    RAR_IDX(rar) * 4, DPAA2_SWP_RT_MODE);
        } else {
                wmb();
                bus_write_1(map, offset, c->verb | RAR_VB(rar) | buf_num);
        }

        DPAA2_SWP_UNLOCK(swp);

        return (0);
}

/**
 * @brief Execute Volatile Dequeue Command (VDC).
 *
 * This command will be executed by QBMan only once in order to deliver requested
 * number of frames (1-16 or 1-32 depending on QBMan version) to the driver via
 * DQRR or arbitrary DMA-mapped memory.
 *
 * NOTE: There is a counterpart to the volatile dequeue command called static
 *       dequeue command (SDQC) which is executed periodically all the time the
 *       command is present in the SDQCR register.
 */
static int
dpaa2_swp_exec_vdc_command_locked(struct dpaa2_swp *swp,
    struct dpaa2_swp_cmd *cmd)
{
        struct __packed with_verb {
                uint8_t verb;
                uint8_t _reserved[63];
        } *c;
        const uint8_t *p8 = (const uint8_t *) cmd->params;
        const uint32_t *p32 = (const uint32_t *) cmd->params;
        struct resource_map *map;
        uint32_t offset;

        map = swp->cfg.writes_cinh ? swp->cinh_map : swp->cena_map;
        offset = swp->cfg.mem_backed
            ? DPAA2_SWP_CENA_VDQCR_MEM : DPAA2_SWP_CENA_VDQCR;
        c = (struct with_verb *) cmd;

        /* Write command bytes (without VERB byte). */
        for (uint32_t i = 1; i < DPAA2_SWP_CMD_PARAMS_N; i++)
                bus_write_8(map, offset + sizeof(uint64_t) * i, cmd->params[i]);
        bus_write_4(map, offset + 4, p32[1]);
        for (uint32_t i = 1; i <= 3; i++)
                bus_write_1(map, offset + i, p8[i]);

        /* Write VERB byte and trigger command execution. */
        if (swp->cfg.mem_backed) {
                bus_write_1(map, offset, c->verb | swp->vdq.valid_bit);
                swp->vdq.valid_bit ^= DPAA2_SWP_VALID_BIT;
                wmb();
                dpaa2_swp_write_reg(swp, DPAA2_SWP_CINH_VDQCR_RT,
                    DPAA2_SWP_RT_MODE);
        } else {
                wmb();
                bus_write_1(map, offset, c->verb | swp->vdq.valid_bit);
                swp->vdq.valid_bit ^= DPAA2_SWP_VALID_BIT;
        }

        return (0);
}

/**
 * @brief Execute a QBMan management command.
 */
static int
dpaa2_swp_exec_mgmt_command(struct dpaa2_swp *swp, struct dpaa2_swp_cmd *cmd,
    struct dpaa2_swp_rsp *rsp, uint8_t cmdid)
{
#if (defined(_KERNEL) && defined(INVARIANTS))
        struct __packed with_verb {
                uint8_t verb;
                uint8_t _reserved[63];
        } *r;
#endif
        uint16_t flags;
        int error;

        if (swp == NULL || cmd == NULL || rsp == NULL)
                return (EINVAL);

        DPAA2_SWP_LOCK(swp, &flags);
        if (flags & DPAA2_SWP_DESTROYED) {
                /* Terminate operation if portal is destroyed. */
                DPAA2_SWP_UNLOCK(swp);
                return (ENOENT);
        }

        /*
         * Send a command to QBMan using Management Command register and wait
         * for response from the Management Response registers.
         */
        dpaa2_swp_send_mgmt_command(swp, cmd, cmdid);
        error = dpaa2_swp_wait_for_mgmt_response(swp, rsp);
        if (error) {
                DPAA2_SWP_UNLOCK(swp);
                return (error);
        }
        DPAA2_SWP_UNLOCK(swp);

#if (defined(_KERNEL) && defined(INVARIANTS))
        r = (struct with_verb *) rsp;
        KASSERT((r->verb & CMD_VERB_MASK) == cmdid,
            ("wrong VERB byte in response: resp=0x%02x, expected=0x%02x",
            r->verb, cmdid));
#endif

        return (0);
}

static int
dpaa2_swp_send_mgmt_command(struct dpaa2_swp *swp, struct dpaa2_swp_cmd *cmd,
    uint8_t cmdid)
{
        const uint8_t *cmd_pdat8 = (const uint8_t *) cmd->params;
        const uint32_t *cmd_pdat32 = (const uint32_t *) cmd->params;
        struct resource_map *map;
        uint32_t offset;

        map = swp->cfg.writes_cinh ? swp->cinh_map : swp->cena_map;
        offset = swp->cfg.mem_backed ? DPAA2_SWP_CENA_CR_MEM : DPAA2_SWP_CENA_CR;

        /* Write command bytes (without VERB byte). */
        for (uint32_t i = 1; i < DPAA2_SWP_CMD_PARAMS_N; i++)
                bus_write_8(map, offset + sizeof(uint64_t) * i, cmd->params[i]);
        bus_write_4(map, offset + 4, cmd_pdat32[1]);
        for (uint32_t i = 1; i <= 3; i++)
                bus_write_1(map, offset + i, cmd_pdat8[i]);

        /* Write VERB byte and trigger command execution. */
        if (swp->cfg.mem_backed) {
                bus_write_1(map, offset, cmdid | swp->mr.valid_bit);
                wmb();
                dpaa2_swp_write_reg(swp, DPAA2_SWP_CINH_CR_RT,
                    DPAA2_SWP_RT_MODE);
        } else {
                wmb();
                bus_write_1(map, offset, cmdid | swp->mc.valid_bit);
        }

        return (0);
}

static int
dpaa2_swp_wait_for_mgmt_response(struct dpaa2_swp *swp, struct dpaa2_swp_rsp *rsp)
{
        struct resource_map *map = swp->cfg.mem_backed
            ? swp->cena_map : swp->cinh_map;
        /* Management command response to be read from the only RR or RR0/RR1. */
        const uint32_t offset = swp->cfg.mem_backed
            ? DPAA2_SWP_CENA_RR_MEM
            : DPAA2_SWP_CENA_RR(swp->mc.valid_bit);
        uint32_t i, verb, ret;
        int rc;

        /* Wait for a command response from QBMan. */
        for (i = 1; i <= CMD_SPIN_ATTEMPTS; i++) {
                if (swp->cfg.mem_backed) {
                        verb = (uint32_t) (bus_read_4(map, offset) & 0xFFu);
                        if (swp->mr.valid_bit != (verb & DPAA2_SWP_VALID_BIT))
                                goto wait;
                        if (!(verb & ~DPAA2_SWP_VALID_BIT))
                                goto wait;
                        swp->mr.valid_bit ^= DPAA2_SWP_VALID_BIT;
                } else {
                        ret = bus_read_4(map, offset);
                        verb = ret & ~DPAA2_SWP_VALID_BIT; /* remove valid bit */
                        if (verb == 0u)
                                goto wait;
                        swp->mc.valid_bit ^= DPAA2_SWP_VALID_BIT;
                }
                break;
 wait:
                DELAY(CMD_SPIN_TIMEOUT);
        }
        /* Return an error on expired timeout. */
        rc = i > CMD_SPIN_ATTEMPTS ? ETIMEDOUT : 0;

        /* Read command response. */
        for (i = 0; i < DPAA2_SWP_RSP_PARAMS_N; i++)
                rsp->params[i] = bus_read_8(map, offset + i * sizeof(uint64_t));

        return (rc);
}