root/sys/dev/igc/igc_txrx.c
/*-
 * SPDX-License-Identifier: BSD-2-Clause
 *
 * Copyright (c) 2016 Matthew Macy <mmacy@mattmacy.io>
 * All rights reserved.
 * Copyright (c) 2021 Rubicon Communications, LLC (Netgate)
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
#include "if_igc.h"

#include <net/rss_config.h>
#include <netinet/in_rss.h>

#ifdef VERBOSE_DEBUG
#define DPRINTF device_printf
#else
#define DPRINTF(...)
#endif

/*********************************************************************
 *  Local Function prototypes
 *********************************************************************/
static int igc_isc_txd_encap(void *, if_pkt_info_t);
static void igc_isc_txd_flush(void *, uint16_t, qidx_t);
static int igc_isc_txd_credits_update(void *, uint16_t, bool);

static void igc_isc_rxd_refill(void *, if_rxd_update_t);

static void igc_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
static int igc_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);

static int igc_isc_rxd_pkt_get(void *, if_rxd_info_t);

static int igc_tx_ctx_setup(struct tx_ring *, if_pkt_info_t, uint32_t *,
    uint32_t *);
static int igc_tso_setup(struct tx_ring *, if_pkt_info_t, uint32_t *,
    uint32_t *);

static void igc_rx_checksum(uint32_t, if_rxd_info_t, uint32_t);
static int igc_determine_rsstype(uint16_t);

extern void igc_if_enable_intr(if_ctx_t);
extern int igc_intr(void *);

struct if_txrx igc_txrx = {
        .ift_txd_encap = igc_isc_txd_encap,
        .ift_txd_flush = igc_isc_txd_flush,
        .ift_txd_credits_update = igc_isc_txd_credits_update,
        .ift_rxd_available = igc_isc_rxd_available,
        .ift_rxd_pkt_get = igc_isc_rxd_pkt_get,
        .ift_rxd_refill = igc_isc_rxd_refill,
        .ift_rxd_flush = igc_isc_rxd_flush,
        .ift_legacy_intr = igc_intr
};

void
igc_dump_rs(struct igc_softc *sc)
{
        if_softc_ctx_t scctx = sc->shared;
        struct igc_tx_queue *que;
        struct tx_ring *txr;
        qidx_t i, ntxd, qid, cur;
        int16_t rs_cidx;
        uint8_t status;

        printf("\n");
        ntxd = scctx->isc_ntxd[0];
        for (qid = 0; qid < sc->tx_num_queues; qid++) {
                que = &sc->tx_queues[qid];
                txr =  &que->txr;
                rs_cidx = txr->tx_rs_cidx;
                if (rs_cidx != txr->tx_rs_pidx) {
                        cur = txr->tx_rsq[rs_cidx];
                        status = txr->tx_base[cur].upper.fields.status;
                        if (!(status & IGC_TXD_STAT_DD))
                                printf("qid[%d]->tx_rsq[%d]: %d clear ",
                                    qid, rs_cidx, cur);
                } else {
                        rs_cidx = (rs_cidx-1)&(ntxd-1);
                        cur = txr->tx_rsq[rs_cidx];
                        printf("qid[%d]->tx_rsq[rs_cidx-1=%d]: %d  ",
                            qid, rs_cidx, cur);
                }
                printf("cidx_prev=%d rs_pidx=%d ",txr->tx_cidx_processed,
                    txr->tx_rs_pidx);
                for (i = 0; i < ntxd; i++) {
                        if (txr->tx_base[i].upper.fields.status &
                            IGC_TXD_STAT_DD)
                                printf("%d set ", i);
                }
                printf("\n");
        }
}

/**********************************************************************
 *
 *  Setup work for hardware segmentation offload (TSO) on
 *  adapters using advanced tx descriptors
 *
 **********************************************************************/
static int
igc_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, uint32_t *cmd_type_len,
    uint32_t *olinfo_status)
{
        struct igc_adv_tx_context_desc *TXD;
        uint32_t type_tucmd_mlhl = 0, vlan_macip_lens = 0;
        uint32_t mss_l4len_idx = 0;
        uint32_t paylen;

        switch(pi->ipi_etype) {
        case ETHERTYPE_IPV6:
                type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV6;
                break;
        case ETHERTYPE_IP:
                type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV4;
                /* Tell transmit desc to also do IPv4 checksum. */
                *olinfo_status |= IGC_TXD_POPTS_IXSM << 8;
                break;
        default:
                panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
                    __func__, ntohs(pi->ipi_etype));
                break;
        }

        TXD = (struct igc_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx];

        /* This is used in the transmit desc in encap */
        paylen = pi->ipi_len - pi->ipi_ehdrlen - pi->ipi_ip_hlen -
            pi->ipi_tcp_hlen;

        /* VLAN MACLEN IPLEN */
        if (pi->ipi_mflags & M_VLANTAG) {
                vlan_macip_lens |= (pi->ipi_vtag << IGC_ADVTXD_VLAN_SHIFT);
        }

        vlan_macip_lens |= pi->ipi_ehdrlen << IGC_ADVTXD_MACLEN_SHIFT;
        vlan_macip_lens |= pi->ipi_ip_hlen;
        TXD->vlan_macip_lens = htole32(vlan_macip_lens);

        /* ADV DTYPE TUCMD */
        type_tucmd_mlhl |= IGC_ADVTXD_DCMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
        type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP;
        TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);

        /* MSS L4LEN IDX */
        mss_l4len_idx |= (pi->ipi_tso_segsz << IGC_ADVTXD_MSS_SHIFT);
        mss_l4len_idx |= (pi->ipi_tcp_hlen << IGC_ADVTXD_L4LEN_SHIFT);
        TXD->mss_l4len_idx = htole32(mss_l4len_idx);

        TXD->seqnum_seed = htole32(0);
        *cmd_type_len |= IGC_ADVTXD_DCMD_TSE;
        *olinfo_status |= IGC_TXD_POPTS_TXSM << 8;
        *olinfo_status |= paylen << IGC_ADVTXD_PAYLEN_SHIFT;

        return (1);
}

/*********************************************************************
 *
 *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
 *
 **********************************************************************/
static int
igc_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi,
    uint32_t *cmd_type_len, uint32_t *olinfo_status)
{
        struct igc_adv_tx_context_desc *TXD;
        uint32_t vlan_macip_lens, type_tucmd_mlhl;
        uint32_t mss_l4len_idx;
        mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0;

        /* First check if TSO is to be used */
        if (pi->ipi_csum_flags & CSUM_TSO)
                return (igc_tso_setup(txr, pi, cmd_type_len, olinfo_status));

        /* Indicate the whole packet as payload when not doing TSO */
        *olinfo_status |= pi->ipi_len << IGC_ADVTXD_PAYLEN_SHIFT;

        /* Now ready a context descriptor */
        TXD = (struct igc_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx];

        /*
        ** In advanced descriptors the vlan tag must
        ** be placed into the context descriptor. Hence
        ** we need to make one even if not doing offloads.
        */
        if (pi->ipi_mflags & M_VLANTAG) {
                vlan_macip_lens |= (pi->ipi_vtag << IGC_ADVTXD_VLAN_SHIFT);
        } else if ((pi->ipi_csum_flags & IGC_CSUM_OFFLOAD) == 0) {
                return (0);
        }

        /* Set the ether header length */
        vlan_macip_lens |= pi->ipi_ehdrlen << IGC_ADVTXD_MACLEN_SHIFT;

        switch(pi->ipi_etype) {
        case ETHERTYPE_IP:
                type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV4;
                break;
        case ETHERTYPE_IPV6:
                type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV6;
                break;
        default:
                break;
        }

        vlan_macip_lens |= pi->ipi_ip_hlen;
        type_tucmd_mlhl |= IGC_ADVTXD_DCMD_DEXT | IGC_ADVTXD_DTYP_CTXT;

        switch (pi->ipi_ipproto) {
        case IPPROTO_TCP:
                if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) {
                        type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP;
                        *olinfo_status |= IGC_TXD_POPTS_TXSM << 8;
                }
                break;
        case IPPROTO_UDP:
                if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) {
                        type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP;
                        *olinfo_status |= IGC_TXD_POPTS_TXSM << 8;
                }
                break;
        case IPPROTO_SCTP:
                if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) {
                        type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_SCTP;
                        *olinfo_status |= IGC_TXD_POPTS_TXSM << 8;
                }
                break;
        default:
                break;
        }

        /* Now copy bits into descriptor */
        TXD->vlan_macip_lens = htole32(vlan_macip_lens);
        TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
        TXD->seqnum_seed = htole32(0);
        TXD->mss_l4len_idx = htole32(mss_l4len_idx);

        return (1);
}

static int
igc_isc_txd_encap(void *arg, if_pkt_info_t pi)
{
        struct igc_softc *sc = arg;
        if_softc_ctx_t scctx = sc->shared;
        struct igc_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
        struct tx_ring *txr = &que->txr;
        int nsegs = pi->ipi_nsegs;
        bus_dma_segment_t *segs = pi->ipi_segs;
        union igc_adv_tx_desc *txd = NULL;
        int i, j, pidx_last;
        uint32_t olinfo_status, cmd_type_len, txd_flags;
        qidx_t ntxd;

        pidx_last = olinfo_status = 0;
        /* Basic descriptor defines */
        cmd_type_len = (IGC_ADVTXD_DTYP_DATA |
            IGC_ADVTXD_DCMD_IFCS | IGC_ADVTXD_DCMD_DEXT);

        if (pi->ipi_mflags & M_VLANTAG)
                cmd_type_len |= IGC_ADVTXD_DCMD_VLE;

        i = pi->ipi_pidx;
        ntxd = scctx->isc_ntxd[0];
        txd_flags = pi->ipi_flags & IPI_TX_INTR ? IGC_ADVTXD_DCMD_RS : 0;
        /* Consume the first descriptor */
        i += igc_tx_ctx_setup(txr, pi, &cmd_type_len, &olinfo_status);
        if (i == scctx->isc_ntxd[0])
                i = 0;

        for (j = 0; j < nsegs; j++) {
                bus_size_t seglen;
                bus_addr_t segaddr;

                txd = (union igc_adv_tx_desc *)&txr->tx_base[i];
                seglen = segs[j].ds_len;
                segaddr = htole64(segs[j].ds_addr);

                txd->read.buffer_addr = segaddr;
                txd->read.cmd_type_len = htole32(IGC_ADVTXD_DCMD_IFCS |
                    cmd_type_len | seglen);
                txd->read.olinfo_status = htole32(olinfo_status);
                pidx_last = i;
                if (++i == scctx->isc_ntxd[0]) {
                        i = 0;
                }
        }
        if (txd_flags) {
                txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
                txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1);
                MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
        }

        txd->read.cmd_type_len |= htole32(IGC_ADVTXD_DCMD_EOP | txd_flags);
        pi->ipi_new_pidx = i;

        /* Sent data accounting for AIM */
        txr->tx_bytes += pi->ipi_len;
        ++txr->tx_packets;

        return (0);
}

static void
igc_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
{
        struct igc_softc *sc = arg;
        struct igc_tx_queue *que = &sc->tx_queues[txqid];
        struct tx_ring *txr = &que->txr;

        IGC_WRITE_REG(&sc->hw, IGC_TDT(txr->me), pidx);
}

static int
igc_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
{
        struct igc_softc *sc = arg;
        if_softc_ctx_t scctx = sc->shared;
        struct igc_tx_queue *que = &sc->tx_queues[txqid];
        struct tx_ring *txr = &que->txr;

        qidx_t processed = 0;
        int updated;
        qidx_t cur, prev, ntxd, rs_cidx;
        int32_t delta;
        uint8_t status;

        rs_cidx = txr->tx_rs_cidx;
        if (rs_cidx == txr->tx_rs_pidx)
                return (0);
        cur = txr->tx_rsq[rs_cidx];
        status = ((union igc_adv_tx_desc *)&txr->tx_base[cur])->wb.status;
        updated = !!(status & IGC_TXD_STAT_DD);

        if (!updated)
                return (0);

        /* If clear is false just let caller know that there
         * are descriptors to reclaim */
        if (!clear)
                return (1);

        prev = txr->tx_cidx_processed;
        ntxd = scctx->isc_ntxd[0];
        do {
                MPASS(prev != cur);
                delta = (int32_t)cur - (int32_t)prev;
                if (delta < 0)
                        delta += ntxd;
                MPASS(delta > 0);

                processed += delta;
                prev = cur;
                rs_cidx = (rs_cidx + 1) & (ntxd-1);
                if (rs_cidx == txr->tx_rs_pidx)
                        break;
                cur = txr->tx_rsq[rs_cidx];
                status =
                    ((union igc_adv_tx_desc *)&txr->tx_base[cur])->wb.status;
        } while ((status & IGC_TXD_STAT_DD));

        txr->tx_rs_cidx = rs_cidx;
        txr->tx_cidx_processed = prev;
        return (processed);
}

static void
igc_isc_rxd_refill(void *arg, if_rxd_update_t iru)
{
        struct igc_softc *sc = arg;
        if_softc_ctx_t scctx = sc->shared;
        uint16_t rxqid = iru->iru_qsidx;
        struct igc_rx_queue *que = &sc->rx_queues[rxqid];
        union igc_adv_rx_desc *rxd;
        struct rx_ring *rxr = &que->rxr;
        uint64_t *paddrs;
        uint32_t next_pidx, pidx;
        uint16_t count;
        int i;

        paddrs = iru->iru_paddrs;
        pidx = iru->iru_pidx;
        count = iru->iru_count;

        for (i = 0, next_pidx = pidx; i < count; i++) {
                rxd = (union igc_adv_rx_desc *)&rxr->rx_base[next_pidx];

                rxd->read.pkt_addr = htole64(paddrs[i]);
                if (++next_pidx == scctx->isc_nrxd[0])
                        next_pidx = 0;
        }
}

static void
igc_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
    qidx_t pidx)
{
        struct igc_softc *sc = arg;
        struct igc_rx_queue *que = &sc->rx_queues[rxqid];
        struct rx_ring *rxr = &que->rxr;

        IGC_WRITE_REG(&sc->hw, IGC_RDT(rxr->me), pidx);
}

static int
igc_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
{
        struct igc_softc *sc = arg;
        if_softc_ctx_t scctx = sc->shared;
        struct igc_rx_queue *que = &sc->rx_queues[rxqid];
        struct rx_ring *rxr = &que->rxr;
        union igc_adv_rx_desc *rxd;
        uint32_t staterr = 0;
        int cnt, i;

        for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
                rxd = (union igc_adv_rx_desc *)&rxr->rx_base[i];
                staterr = le32toh(rxd->wb.upper.status_error);

                if ((staterr & IGC_RXD_STAT_DD) == 0)
                        break;
                if (++i == scctx->isc_nrxd[0])
                        i = 0;
                if (staterr & IGC_RXD_STAT_EOP)
                        cnt++;
        }
        return (cnt);
}

/****************************************************************
 * Routine sends data which has been dma'ed into host memory
 * to upper layer. Initialize ri structure.
 *
 * Returns 0 upon success, errno on failure
 ***************************************************************/

static int
igc_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
{
        struct igc_softc *sc = arg;
        if_softc_ctx_t scctx = sc->shared;
        struct igc_rx_queue *que = &sc->rx_queues[ri->iri_qsidx];
        struct rx_ring *rxr = &que->rxr;
        union igc_adv_rx_desc *rxd;

        uint16_t pkt_info, len;
        uint32_t ptype, staterr;
        int i, cidx;
        bool eop;

        staterr = i = 0;
        cidx = ri->iri_cidx;

        do {
                rxd = (union igc_adv_rx_desc *)&rxr->rx_base[cidx];
                staterr = le32toh(rxd->wb.upper.status_error);
                pkt_info = le16toh(rxd->wb.lower.lo_dword.hs_rss.pkt_info);

                MPASS ((staterr & IGC_RXD_STAT_DD) != 0);

                len = le16toh(rxd->wb.upper.length);
                ptype =
                    le32toh(rxd->wb.lower.lo_dword.data) & IGC_PKTTYPE_MASK;

                ri->iri_len += len;
                rxr->rx_bytes += ri->iri_len;

                rxd->wb.upper.status_error = 0;
                eop = ((staterr & IGC_RXD_STAT_EOP) == IGC_RXD_STAT_EOP);

                /* Make sure bad packets are discarded */
                if (eop && ((staterr & IGC_RXDEXT_STATERR_RXE) != 0)) {
                        sc->dropped_pkts++;
                        ++rxr->rx_discarded;
                        return (EBADMSG);
                }
                ri->iri_frags[i].irf_flid = 0;
                ri->iri_frags[i].irf_idx = cidx;
                ri->iri_frags[i].irf_len = len;

                if (++cidx == scctx->isc_nrxd[0])
                        cidx = 0;
#ifdef notyet
                if (rxr->hdr_split == true) {
                        ri->iri_frags[i].irf_flid = 1;
                        ri->iri_frags[i].irf_idx = cidx;
                        if (++cidx == scctx->isc_nrxd[0])
                                cidx = 0;
                }
#endif
                i++;
        } while (!eop);

        rxr->rx_packets++;

        if ((scctx->isc_capenable & IFCAP_RXCSUM) != 0)
                igc_rx_checksum(staterr, ri, ptype);

        if (staterr & IGC_RXD_STAT_VP) {
                ri->iri_vtag = le16toh(rxd->wb.upper.vlan);
                ri->iri_flags |= M_VLANTAG;
        }

        ri->iri_flowid =
                le32toh(rxd->wb.lower.hi_dword.rss);
        ri->iri_rsstype = igc_determine_rsstype(pkt_info);
        ri->iri_nfrags = i;

        return (0);
}

/*********************************************************************
 *
 *  Verify that the hardware indicated that the checksum is valid.
 *  Inform the stack about the status of checksum so that stack
 *  doesn't spend time verifying the checksum.
 *
 *********************************************************************/
static void
igc_rx_checksum(uint32_t staterr, if_rxd_info_t ri, uint32_t ptype)
{
        uint16_t status = (uint16_t)staterr;
        uint8_t errors = (uint8_t)(staterr >> 24);

        if (__predict_false(status & IGC_RXD_STAT_IXSM))
                return;

        /* If there is a layer 3 or 4 error we are done */
        if (__predict_false(errors & (IGC_RXD_ERR_IPE | IGC_RXD_ERR_TCPE)))
                return;

        /* IP Checksum Good */
        if (status & IGC_RXD_STAT_IPCS)
                ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);

        /* Valid L4E checksum */
        if (__predict_true(status &
            (IGC_RXD_STAT_TCPCS | IGC_RXD_STAT_UDPCS))) {
                /* SCTP header present */
                if (__predict_false((ptype & IGC_RXDADV_PKTTYPE_ETQF) == 0 &&
                    (ptype & IGC_RXDADV_PKTTYPE_SCTP) != 0)) {
                        ri->iri_csum_flags |= CSUM_SCTP_VALID;
                } else {
                        ri->iri_csum_flags |=
                            CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
                        ri->iri_csum_data = htons(0xffff);
                }
        }
}

/********************************************************************
 *
 *  Parse the packet type to determine the appropriate hash
 *
 ******************************************************************/
static int
igc_determine_rsstype(uint16_t pkt_info)
{
        switch (pkt_info & IGC_RXDADV_RSSTYPE_MASK) {
        case IGC_RXDADV_RSSTYPE_IPV4_TCP:
                return M_HASHTYPE_RSS_TCP_IPV4;
        case IGC_RXDADV_RSSTYPE_IPV4:
                return M_HASHTYPE_RSS_IPV4;
        case IGC_RXDADV_RSSTYPE_IPV6_TCP:
                return M_HASHTYPE_RSS_TCP_IPV6;
        case IGC_RXDADV_RSSTYPE_IPV6_EX:
                return M_HASHTYPE_RSS_IPV6_EX;
        case IGC_RXDADV_RSSTYPE_IPV6:
                return M_HASHTYPE_RSS_IPV6;
        case IGC_RXDADV_RSSTYPE_IPV6_TCP_EX:
                return M_HASHTYPE_RSS_TCP_IPV6_EX;
        default:
                return M_HASHTYPE_NONE;
        }
}