root/drivers/net/wireless/quantenna/qtnfmac/pcie/pearl_pcie.c
// SPDX-License-Identifier: GPL-2.0+
/* Copyright (c) 2018 Quantenna Communications */

#include <linux/kernel.h>
#include <linux/firmware.h>
#include <linux/pci.h>
#include <linux/vmalloc.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/completion.h>
#include <linux/crc32.h>
#include <linux/spinlock.h>
#include <linux/circ_buf.h>
#include <linux/log2.h>

#include "pcie_priv.h"
#include "pearl_pcie_regs.h"
#include "pearl_pcie_ipc.h"
#include "qtn_hw_ids.h"
#include "core.h"
#include "bus.h"
#include "shm_ipc.h"
#include "debug.h"

#define PEARL_TX_BD_SIZE_DEFAULT        32
#define PEARL_RX_BD_SIZE_DEFAULT        256

struct qtnf_pearl_bda {
        __le16 bda_len;
        __le16 bda_version;
        __le32 bda_pci_endian;
        __le32 bda_ep_state;
        __le32 bda_rc_state;
        __le32 bda_dma_mask;
        __le32 bda_msi_addr;
        __le32 bda_flashsz;
        u8 bda_boardname[PCIE_BDA_NAMELEN];
        __le32 bda_rc_msi_enabled;
        u8 bda_hhbm_list[PCIE_HHBM_MAX_SIZE];
        __le32 bda_dsbw_start_index;
        __le32 bda_dsbw_end_index;
        __le32 bda_dsbw_total_bytes;
        __le32 bda_rc_tx_bd_base;
        __le32 bda_rc_tx_bd_num;
        u8 bda_pcie_mac[QTN_ENET_ADDR_LENGTH];
        struct qtnf_shm_ipc_region bda_shm_reg1 __aligned(4096); /* host TX */
        struct qtnf_shm_ipc_region bda_shm_reg2 __aligned(4096); /* host RX */
} __packed;

struct qtnf_pearl_tx_bd {
        __le32 addr;
        __le32 addr_h;
        __le32 info;
        __le32 info_h;
} __packed;

struct qtnf_pearl_rx_bd {
        __le32 addr;
        __le32 addr_h;
        __le32 info;
        __le32 info_h;
        __le32 next_ptr;
        __le32 next_ptr_h;
} __packed;

struct qtnf_pearl_fw_hdr {
        u8 boardflg[8];
        __le32 fwsize;
        __le32 seqnum;
        __le32 type;
        __le32 pktlen;
        __le32 crc;
} __packed;

struct qtnf_pcie_pearl_state {
        struct qtnf_pcie_bus_priv base;

        /* lock for irq configuration changes */
        spinlock_t irq_lock;

        struct qtnf_pearl_bda __iomem *bda;
        void __iomem *pcie_reg_base;

        struct qtnf_pearl_tx_bd *tx_bd_vbase;
        dma_addr_t tx_bd_pbase;

        struct qtnf_pearl_rx_bd *rx_bd_vbase;
        dma_addr_t rx_bd_pbase;

        dma_addr_t bd_table_paddr;
        void *bd_table_vaddr;
        u32 bd_table_len;
        u32 pcie_irq_mask;
        u32 pcie_irq_rx_count;
        u32 pcie_irq_tx_count;
        u32 pcie_irq_uf_count;
};

static inline void qtnf_init_hdp_irqs(struct qtnf_pcie_pearl_state *ps)
{
        unsigned long flags;

        spin_lock_irqsave(&ps->irq_lock, flags);
        ps->pcie_irq_mask = (PCIE_HDP_INT_RX_BITS | PCIE_HDP_INT_TX_BITS);
        spin_unlock_irqrestore(&ps->irq_lock, flags);
}

static inline void qtnf_enable_hdp_irqs(struct qtnf_pcie_pearl_state *ps)
{
        unsigned long flags;

        spin_lock_irqsave(&ps->irq_lock, flags);
        writel(ps->pcie_irq_mask, PCIE_HDP_INT_EN(ps->pcie_reg_base));
        spin_unlock_irqrestore(&ps->irq_lock, flags);
}

static inline void qtnf_disable_hdp_irqs(struct qtnf_pcie_pearl_state *ps)
{
        unsigned long flags;

        spin_lock_irqsave(&ps->irq_lock, flags);
        writel(0x0, PCIE_HDP_INT_EN(ps->pcie_reg_base));
        spin_unlock_irqrestore(&ps->irq_lock, flags);
}

static inline void qtnf_en_rxdone_irq(struct qtnf_pcie_pearl_state *ps)
{
        unsigned long flags;

        spin_lock_irqsave(&ps->irq_lock, flags);
        ps->pcie_irq_mask |= PCIE_HDP_INT_RX_BITS;
        writel(ps->pcie_irq_mask, PCIE_HDP_INT_EN(ps->pcie_reg_base));
        spin_unlock_irqrestore(&ps->irq_lock, flags);
}

static inline void qtnf_dis_rxdone_irq(struct qtnf_pcie_pearl_state *ps)
{
        unsigned long flags;

        spin_lock_irqsave(&ps->irq_lock, flags);
        ps->pcie_irq_mask &= ~PCIE_HDP_INT_RX_BITS;
        writel(ps->pcie_irq_mask, PCIE_HDP_INT_EN(ps->pcie_reg_base));
        spin_unlock_irqrestore(&ps->irq_lock, flags);
}

static inline void qtnf_en_txdone_irq(struct qtnf_pcie_pearl_state *ps)
{
        unsigned long flags;

        spin_lock_irqsave(&ps->irq_lock, flags);
        ps->pcie_irq_mask |= PCIE_HDP_INT_TX_BITS;
        writel(ps->pcie_irq_mask, PCIE_HDP_INT_EN(ps->pcie_reg_base));
        spin_unlock_irqrestore(&ps->irq_lock, flags);
}

static inline void qtnf_dis_txdone_irq(struct qtnf_pcie_pearl_state *ps)
{
        unsigned long flags;

        spin_lock_irqsave(&ps->irq_lock, flags);
        ps->pcie_irq_mask &= ~PCIE_HDP_INT_TX_BITS;
        writel(ps->pcie_irq_mask, PCIE_HDP_INT_EN(ps->pcie_reg_base));
        spin_unlock_irqrestore(&ps->irq_lock, flags);
}

static void qtnf_deassert_intx(struct qtnf_pcie_pearl_state *ps)
{
        void __iomem *reg = ps->base.sysctl_bar + PEARL_PCIE_CFG0_OFFSET;
        u32 cfg;

        cfg = readl(reg);
        cfg &= ~PEARL_ASSERT_INTX;
        qtnf_non_posted_write(cfg, reg);
}

static void qtnf_pearl_reset_ep(struct qtnf_pcie_pearl_state *ps)
{
        const u32 data = QTN_PEARL_IPC_IRQ_WORD(QTN_PEARL_LHOST_EP_RESET);
        void __iomem *reg = ps->base.sysctl_bar +
                            QTN_PEARL_SYSCTL_LHOST_IRQ_OFFSET;

        qtnf_non_posted_write(data, reg);
        msleep(QTN_EP_RESET_WAIT_MS);
        pci_restore_state(ps->base.pdev);
}

static void qtnf_pcie_pearl_ipc_gen_ep_int(void *arg)
{
        const struct qtnf_pcie_pearl_state *ps = arg;
        const u32 data = QTN_PEARL_IPC_IRQ_WORD(QTN_PEARL_LHOST_IPC_IRQ);
        void __iomem *reg = ps->base.sysctl_bar +
                            QTN_PEARL_SYSCTL_LHOST_IRQ_OFFSET;

        qtnf_non_posted_write(data, reg);
}

static int qtnf_is_state(__le32 __iomem *reg, u32 state)
{
        u32 s = readl(reg);

        return s & state;
}

static void qtnf_set_state(__le32 __iomem *reg, u32 state)
{
        u32 s = readl(reg);

        qtnf_non_posted_write(state | s, reg);
}

static void qtnf_clear_state(__le32 __iomem *reg, u32 state)
{
        u32 s = readl(reg);

        qtnf_non_posted_write(s & ~state, reg);
}

static int qtnf_poll_state(__le32 __iomem *reg, u32 state, u32 delay_in_ms)
{
        u32 timeout = 0;

        while ((qtnf_is_state(reg, state) == 0)) {
                usleep_range(1000, 1200);
                if (++timeout > delay_in_ms)
                        return -1;
        }

        return 0;
}

static int pearl_alloc_bd_table(struct qtnf_pcie_pearl_state *ps)
{
        struct qtnf_pcie_bus_priv *priv = &ps->base;
        dma_addr_t paddr;
        void *vaddr;
        int len;

        len = priv->tx_bd_num * sizeof(struct qtnf_pearl_tx_bd) +
                priv->rx_bd_num * sizeof(struct qtnf_pearl_rx_bd);

        vaddr = dmam_alloc_coherent(&priv->pdev->dev, len, &paddr, GFP_KERNEL);
        if (!vaddr)
                return -ENOMEM;

        /* tx bd */

        ps->bd_table_vaddr = vaddr;
        ps->bd_table_paddr = paddr;
        ps->bd_table_len = len;

        ps->tx_bd_vbase = vaddr;
        ps->tx_bd_pbase = paddr;

        pr_debug("TX descriptor table: vaddr=0x%p paddr=%pad\n", vaddr, &paddr);

        priv->tx_bd_r_index = 0;
        priv->tx_bd_w_index = 0;

        /* rx bd */

        vaddr = ((struct qtnf_pearl_tx_bd *)vaddr) + priv->tx_bd_num;
        paddr += priv->tx_bd_num * sizeof(struct qtnf_pearl_tx_bd);

        ps->rx_bd_vbase = vaddr;
        ps->rx_bd_pbase = paddr;

#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
        writel(QTN_HOST_HI32(paddr),
               PCIE_HDP_TX_HOST_Q_BASE_H(ps->pcie_reg_base));
#endif
        writel(QTN_HOST_LO32(paddr),
               PCIE_HDP_TX_HOST_Q_BASE_L(ps->pcie_reg_base));
        writel(priv->rx_bd_num | (sizeof(struct qtnf_pearl_rx_bd)) << 16,
               PCIE_HDP_TX_HOST_Q_SZ_CTRL(ps->pcie_reg_base));

        pr_debug("RX descriptor table: vaddr=0x%p paddr=%pad\n", vaddr, &paddr);

        return 0;
}

static int pearl_skb2rbd_attach(struct qtnf_pcie_pearl_state *ps, u16 index)
{
        struct qtnf_pcie_bus_priv *priv = &ps->base;
        struct qtnf_pearl_rx_bd *rxbd;
        struct sk_buff *skb;
        dma_addr_t paddr;

        skb = netdev_alloc_skb_ip_align(NULL, SKB_BUF_SIZE);
        if (!skb) {
                priv->rx_skb[index] = NULL;
                return -ENOMEM;
        }

        priv->rx_skb[index] = skb;
        rxbd = &ps->rx_bd_vbase[index];

        paddr = dma_map_single(&priv->pdev->dev, skb->data, SKB_BUF_SIZE,
                               DMA_FROM_DEVICE);
        if (dma_mapping_error(&priv->pdev->dev, paddr)) {
                pr_err("skb DMA mapping error: %pad\n", &paddr);
                return -ENOMEM;
        }

        /* keep rx skb paddrs in rx buffer descriptors for cleanup purposes */
        rxbd->addr = cpu_to_le32(QTN_HOST_LO32(paddr));
        rxbd->addr_h = cpu_to_le32(QTN_HOST_HI32(paddr));
        rxbd->info = 0x0;

        priv->rx_bd_w_index = index;

        /* sync up all descriptor updates */
        wmb();

#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
        writel(QTN_HOST_HI32(paddr),
               PCIE_HDP_HHBM_BUF_PTR_H(ps->pcie_reg_base));
#endif
        writel(QTN_HOST_LO32(paddr),
               PCIE_HDP_HHBM_BUF_PTR(ps->pcie_reg_base));

        writel(index, PCIE_HDP_TX_HOST_Q_WR_PTR(ps->pcie_reg_base));
        return 0;
}

static int pearl_alloc_rx_buffers(struct qtnf_pcie_pearl_state *ps)
{
        u16 i;
        int ret = 0;

        memset(ps->rx_bd_vbase, 0x0,
               ps->base.rx_bd_num * sizeof(struct qtnf_pearl_rx_bd));

        for (i = 0; i < ps->base.rx_bd_num; i++) {
                ret = pearl_skb2rbd_attach(ps, i);
                if (ret)
                        break;
        }

        return ret;
}

/* all rx/tx activity should have ceased before calling this function */
static void qtnf_pearl_free_xfer_buffers(struct qtnf_pcie_pearl_state *ps)
{
        struct qtnf_pcie_bus_priv *priv = &ps->base;
        struct qtnf_pearl_tx_bd *txbd;
        struct qtnf_pearl_rx_bd *rxbd;
        struct sk_buff *skb;
        dma_addr_t paddr;
        int i;

        /* free rx buffers */
        for (i = 0; i < priv->rx_bd_num; i++) {
                if (priv->rx_skb && priv->rx_skb[i]) {
                        rxbd = &ps->rx_bd_vbase[i];
                        skb = priv->rx_skb[i];
                        paddr = QTN_HOST_ADDR(le32_to_cpu(rxbd->addr_h),
                                              le32_to_cpu(rxbd->addr));
                        dma_unmap_single(&priv->pdev->dev, paddr,
                                         SKB_BUF_SIZE, DMA_FROM_DEVICE);
                        dev_kfree_skb_any(skb);
                        priv->rx_skb[i] = NULL;
                }
        }

        /* free tx buffers */
        for (i = 0; i < priv->tx_bd_num; i++) {
                if (priv->tx_skb && priv->tx_skb[i]) {
                        txbd = &ps->tx_bd_vbase[i];
                        skb = priv->tx_skb[i];
                        paddr = QTN_HOST_ADDR(le32_to_cpu(txbd->addr_h),
                                              le32_to_cpu(txbd->addr));
                        dma_unmap_single(&priv->pdev->dev, paddr, skb->len,
                                         DMA_TO_DEVICE);
                        dev_kfree_skb_any(skb);
                        priv->tx_skb[i] = NULL;
                }
        }
}

static int pearl_hhbm_init(struct qtnf_pcie_pearl_state *ps)
{
        u32 val;

        val = readl(PCIE_HHBM_CONFIG(ps->pcie_reg_base));
        val |= HHBM_CONFIG_SOFT_RESET;
        writel(val, PCIE_HHBM_CONFIG(ps->pcie_reg_base));
        usleep_range(50, 100);
        val &= ~HHBM_CONFIG_SOFT_RESET;
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
        val |= HHBM_64BIT;
#endif
        writel(val, PCIE_HHBM_CONFIG(ps->pcie_reg_base));
        writel(ps->base.rx_bd_num, PCIE_HHBM_Q_LIMIT_REG(ps->pcie_reg_base));

        return 0;
}

static int qtnf_pcie_pearl_init_xfer(struct qtnf_pcie_pearl_state *ps,
                                     unsigned int tx_bd_size,
                                     unsigned int rx_bd_size)
{
        struct qtnf_pcie_bus_priv *priv = &ps->base;
        int ret;
        u32 val;

        if (tx_bd_size == 0)
                tx_bd_size = PEARL_TX_BD_SIZE_DEFAULT;

        val = tx_bd_size * sizeof(struct qtnf_pearl_tx_bd);

        if (!is_power_of_2(tx_bd_size) || val > PCIE_HHBM_MAX_SIZE) {
                pr_warn("invalid tx_bd_size value %u, use default %u\n",
                        tx_bd_size, PEARL_TX_BD_SIZE_DEFAULT);
                priv->tx_bd_num = PEARL_TX_BD_SIZE_DEFAULT;
        } else {
                priv->tx_bd_num = tx_bd_size;
        }

        if (rx_bd_size == 0)
                rx_bd_size = PEARL_RX_BD_SIZE_DEFAULT;

        val = rx_bd_size * sizeof(dma_addr_t);

        if (!is_power_of_2(rx_bd_size) || val > PCIE_HHBM_MAX_SIZE) {
                pr_warn("invalid rx_bd_size value %u, use default %u\n",
                        rx_bd_size, PEARL_RX_BD_SIZE_DEFAULT);
                priv->rx_bd_num = PEARL_RX_BD_SIZE_DEFAULT;
        } else {
                priv->rx_bd_num = rx_bd_size;
        }

        priv->rx_bd_w_index = 0;
        priv->rx_bd_r_index = 0;

        ret = pearl_hhbm_init(ps);
        if (ret) {
                pr_err("failed to init h/w queues\n");
                return ret;
        }

        ret = qtnf_pcie_alloc_skb_array(priv);
        if (ret) {
                pr_err("failed to allocate skb array\n");
                return ret;
        }

        ret = pearl_alloc_bd_table(ps);
        if (ret) {
                pr_err("failed to allocate bd table\n");
                return ret;
        }

        ret = pearl_alloc_rx_buffers(ps);
        if (ret) {
                pr_err("failed to allocate rx buffers\n");
                return ret;
        }

        return ret;
}

static void qtnf_pearl_data_tx_reclaim(struct qtnf_pcie_pearl_state *ps)
{
        struct qtnf_pcie_bus_priv *priv = &ps->base;
        struct qtnf_pearl_tx_bd *txbd;
        struct sk_buff *skb;
        unsigned long flags;
        dma_addr_t paddr;
        u32 tx_done_index;
        int count = 0;
        int i;

        spin_lock_irqsave(&priv->tx_reclaim_lock, flags);

        tx_done_index = readl(PCIE_HDP_RX0DMA_CNT(ps->pcie_reg_base))
                        & (priv->tx_bd_num - 1);

        i = priv->tx_bd_r_index;

        while (CIRC_CNT(tx_done_index, i, priv->tx_bd_num)) {
                skb = priv->tx_skb[i];
                if (likely(skb)) {
                        txbd = &ps->tx_bd_vbase[i];
                        paddr = QTN_HOST_ADDR(le32_to_cpu(txbd->addr_h),
                                              le32_to_cpu(txbd->addr));
                        dma_unmap_single(&priv->pdev->dev, paddr, skb->len,
                                         DMA_TO_DEVICE);

                        if (skb->dev) {
                                dev_sw_netstats_tx_add(skb->dev, 1, skb->len);
                                if (unlikely(priv->tx_stopped)) {
                                        qtnf_wake_all_queues(skb->dev);
                                        priv->tx_stopped = 0;
                                }
                        }

                        dev_kfree_skb_any(skb);
                }

                priv->tx_skb[i] = NULL;
                count++;

                if (++i >= priv->tx_bd_num)
                        i = 0;
        }

        priv->tx_reclaim_done += count;
        priv->tx_reclaim_req++;
        priv->tx_bd_r_index = i;

        spin_unlock_irqrestore(&priv->tx_reclaim_lock, flags);
}

static int qtnf_tx_queue_ready(struct qtnf_pcie_pearl_state *ps)
{
        struct qtnf_pcie_bus_priv *priv = &ps->base;

        if (!CIRC_SPACE(priv->tx_bd_w_index, priv->tx_bd_r_index,
                        priv->tx_bd_num)) {
                qtnf_pearl_data_tx_reclaim(ps);

                if (!CIRC_SPACE(priv->tx_bd_w_index, priv->tx_bd_r_index,
                                priv->tx_bd_num)) {
                        pr_warn_ratelimited("reclaim full Tx queue\n");
                        priv->tx_full_count++;
                        return 0;
                }
        }

        return 1;
}

static int qtnf_pcie_skb_send(struct qtnf_bus *bus, struct sk_buff *skb)
{
        struct qtnf_pcie_pearl_state *ps = get_bus_priv(bus);
        struct qtnf_pcie_bus_priv *priv = &ps->base;
        dma_addr_t txbd_paddr, skb_paddr;
        struct qtnf_pearl_tx_bd *txbd;
        unsigned long flags;
        int len, i;
        u32 info;
        int ret = 0;

        spin_lock_irqsave(&priv->tx_lock, flags);

        if (!qtnf_tx_queue_ready(ps)) {
                if (skb->dev) {
                        netif_tx_stop_all_queues(skb->dev);
                        priv->tx_stopped = 1;
                }

                spin_unlock_irqrestore(&priv->tx_lock, flags);
                return NETDEV_TX_BUSY;
        }

        i = priv->tx_bd_w_index;
        priv->tx_skb[i] = skb;
        len = skb->len;

        skb_paddr = dma_map_single(&priv->pdev->dev, skb->data, skb->len,
                                   DMA_TO_DEVICE);
        if (dma_mapping_error(&priv->pdev->dev, skb_paddr)) {
                pr_err("skb DMA mapping error: %pad\n", &skb_paddr);
                ret = -ENOMEM;
                goto tx_done;
        }

        txbd = &ps->tx_bd_vbase[i];
        txbd->addr = cpu_to_le32(QTN_HOST_LO32(skb_paddr));
        txbd->addr_h = cpu_to_le32(QTN_HOST_HI32(skb_paddr));

        info = (len & QTN_PCIE_TX_DESC_LEN_MASK) << QTN_PCIE_TX_DESC_LEN_SHIFT;
        txbd->info = cpu_to_le32(info);

        /* sync up all descriptor updates before passing them to EP */
        dma_wmb();

        /* write new TX descriptor to PCIE_RX_FIFO on EP */
        txbd_paddr = ps->tx_bd_pbase + i * sizeof(struct qtnf_pearl_tx_bd);

#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
        writel(QTN_HOST_HI32(txbd_paddr),
               PCIE_HDP_HOST_WR_DESC0_H(ps->pcie_reg_base));
#endif
        writel(QTN_HOST_LO32(txbd_paddr),
               PCIE_HDP_HOST_WR_DESC0(ps->pcie_reg_base));

        if (++i >= priv->tx_bd_num)
                i = 0;

        priv->tx_bd_w_index = i;

tx_done:
        if (ret) {
                pr_err_ratelimited("drop skb\n");
                if (skb->dev)
                        skb->dev->stats.tx_dropped++;
                dev_kfree_skb_any(skb);
        }

        priv->tx_done_count++;
        spin_unlock_irqrestore(&priv->tx_lock, flags);

        qtnf_pearl_data_tx_reclaim(ps);

        return NETDEV_TX_OK;
}

static int qtnf_pcie_data_tx(struct qtnf_bus *bus, struct sk_buff *skb,
                             unsigned int macid, unsigned int vifid)
{
        return qtnf_pcie_skb_send(bus, skb);
}

static int qtnf_pcie_data_tx_meta(struct qtnf_bus *bus, struct sk_buff *skb,
                                  unsigned int macid, unsigned int vifid)
{
        struct qtnf_frame_meta_info *meta;
        int tail_need = sizeof(*meta) - skb_tailroom(skb);
        int ret;

        if (tail_need > 0 && pskb_expand_head(skb, 0, tail_need, GFP_ATOMIC)) {
                skb->dev->stats.tx_dropped++;
                dev_kfree_skb_any(skb);
                return NETDEV_TX_OK;
        }

        meta = skb_put(skb, sizeof(*meta));
        meta->magic_s = HBM_FRAME_META_MAGIC_PATTERN_S;
        meta->magic_e = HBM_FRAME_META_MAGIC_PATTERN_E;
        meta->macid = macid;
        meta->ifidx = vifid;

        ret = qtnf_pcie_skb_send(bus, skb);
        if (unlikely(ret == NETDEV_TX_BUSY))
                __skb_trim(skb, skb->len - sizeof(*meta));

        return ret;
}

static irqreturn_t qtnf_pcie_pearl_interrupt(int irq, void *data)
{
        struct qtnf_bus *bus = (struct qtnf_bus *)data;
        struct qtnf_pcie_pearl_state *ps = get_bus_priv(bus);
        struct qtnf_pcie_bus_priv *priv = &ps->base;
        u32 status;

        priv->pcie_irq_count++;
        status = readl(PCIE_HDP_INT_STATUS(ps->pcie_reg_base));

        qtnf_shm_ipc_irq_handler(&priv->shm_ipc_ep_in);
        qtnf_shm_ipc_irq_handler(&priv->shm_ipc_ep_out);

        if (!(status & ps->pcie_irq_mask))
                goto irq_done;

        if (status & PCIE_HDP_INT_RX_BITS)
                ps->pcie_irq_rx_count++;

        if (status & PCIE_HDP_INT_TX_BITS)
                ps->pcie_irq_tx_count++;

        if (status & PCIE_HDP_INT_HHBM_UF)
                ps->pcie_irq_uf_count++;

        if (status & PCIE_HDP_INT_RX_BITS) {
                qtnf_dis_rxdone_irq(ps);
                napi_schedule(&bus->mux_napi);
        }

        if (status & PCIE_HDP_INT_TX_BITS) {
                qtnf_dis_txdone_irq(ps);
                tasklet_hi_schedule(&priv->reclaim_tq);
        }

irq_done:
        /* H/W workaround: clean all bits, not only enabled */
        qtnf_non_posted_write(~0U, PCIE_HDP_INT_STATUS(ps->pcie_reg_base));

        if (!priv->msi_enabled)
                qtnf_deassert_intx(ps);

        return IRQ_HANDLED;
}

static int qtnf_rx_data_ready(struct qtnf_pcie_pearl_state *ps)
{
        u16 index = ps->base.rx_bd_r_index;
        struct qtnf_pearl_rx_bd *rxbd;
        u32 descw;

        rxbd = &ps->rx_bd_vbase[index];
        descw = le32_to_cpu(rxbd->info);

        if (descw & QTN_TXDONE_MASK)
                return 1;

        return 0;
}

static int qtnf_pcie_pearl_rx_poll(struct napi_struct *napi, int budget)
{
        struct qtnf_bus *bus = container_of(napi, struct qtnf_bus, mux_napi);
        struct qtnf_pcie_pearl_state *ps = get_bus_priv(bus);
        struct qtnf_pcie_bus_priv *priv = &ps->base;
        struct net_device *ndev = NULL;
        struct sk_buff *skb = NULL;
        int processed = 0;
        struct qtnf_pearl_rx_bd *rxbd;
        dma_addr_t skb_paddr;
        int consume;
        u32 descw;
        u32 psize;
        u16 r_idx;
        u16 w_idx;
        int ret;

        while (processed < budget) {
                if (!qtnf_rx_data_ready(ps))
                        goto rx_out;

                r_idx = priv->rx_bd_r_index;
                rxbd = &ps->rx_bd_vbase[r_idx];
                descw = le32_to_cpu(rxbd->info);

                skb = priv->rx_skb[r_idx];
                psize = QTN_GET_LEN(descw);
                consume = 1;

                if (!(descw & QTN_TXDONE_MASK)) {
                        pr_warn("skip invalid rxbd[%d]\n", r_idx);
                        consume = 0;
                }

                if (!skb) {
                        pr_warn("skip missing rx_skb[%d]\n", r_idx);
                        consume = 0;
                }

                if (skb && (skb_tailroom(skb) <  psize)) {
                        pr_err("skip packet with invalid length: %u > %u\n",
                               psize, skb_tailroom(skb));
                        consume = 0;
                }

                if (skb) {
                        skb_paddr = QTN_HOST_ADDR(le32_to_cpu(rxbd->addr_h),
                                                  le32_to_cpu(rxbd->addr));
                        dma_unmap_single(&priv->pdev->dev, skb_paddr,
                                         SKB_BUF_SIZE, DMA_FROM_DEVICE);
                }

                if (consume) {
                        skb_put(skb, psize);
                        ndev = qtnf_classify_skb(bus, skb);
                        if (likely(ndev)) {
                                dev_sw_netstats_rx_add(ndev, skb->len);
                                skb->protocol = eth_type_trans(skb, ndev);
                                napi_gro_receive(napi, skb);
                        } else {
                                pr_debug("drop untagged skb\n");
                                bus->mux_dev->stats.rx_dropped++;
                                dev_kfree_skb_any(skb);
                        }
                } else {
                        if (skb) {
                                bus->mux_dev->stats.rx_dropped++;
                                dev_kfree_skb_any(skb);
                        }
                }

                priv->rx_skb[r_idx] = NULL;
                if (++r_idx >= priv->rx_bd_num)
                        r_idx = 0;

                priv->rx_bd_r_index = r_idx;

                /* repalce processed buffer by a new one */
                w_idx = priv->rx_bd_w_index;
                while (CIRC_SPACE(priv->rx_bd_w_index, priv->rx_bd_r_index,
                                  priv->rx_bd_num) > 0) {
                        if (++w_idx >= priv->rx_bd_num)
                                w_idx = 0;

                        ret = pearl_skb2rbd_attach(ps, w_idx);
                        if (ret) {
                                pr_err("failed to allocate new rx_skb[%d]\n",
                                       w_idx);
                                break;
                        }
                }

                processed++;
        }

rx_out:
        if (processed < budget) {
                napi_complete(napi);
                qtnf_en_rxdone_irq(ps);
        }

        return processed;
}

static void
qtnf_pcie_data_tx_timeout(struct qtnf_bus *bus, struct net_device *ndev)
{
        struct qtnf_pcie_pearl_state *ps = (void *)get_bus_priv(bus);

        tasklet_hi_schedule(&ps->base.reclaim_tq);
}

static void qtnf_pcie_data_rx_start(struct qtnf_bus *bus)
{
        struct qtnf_pcie_pearl_state *ps = (void *)get_bus_priv(bus);

        qtnf_enable_hdp_irqs(ps);
        napi_enable(&bus->mux_napi);
}

static void qtnf_pcie_data_rx_stop(struct qtnf_bus *bus)
{
        struct qtnf_pcie_pearl_state *ps = (void *)get_bus_priv(bus);

        napi_disable(&bus->mux_napi);
        qtnf_disable_hdp_irqs(ps);
}

static void qtnf_pearl_tx_use_meta_info_set(struct qtnf_bus *bus, bool use_meta)
{
        if (use_meta)
                bus->bus_ops->data_tx = qtnf_pcie_data_tx_meta;
        else
                bus->bus_ops->data_tx = qtnf_pcie_data_tx;
}

static struct qtnf_bus_ops qtnf_pcie_pearl_bus_ops = {
        /* control path methods */
        .control_tx     = qtnf_pcie_control_tx,

        /* data path methods */
        .data_tx                = qtnf_pcie_data_tx,
        .data_tx_timeout        = qtnf_pcie_data_tx_timeout,
        .data_tx_use_meta_set   = qtnf_pearl_tx_use_meta_info_set,
        .data_rx_start          = qtnf_pcie_data_rx_start,
        .data_rx_stop           = qtnf_pcie_data_rx_stop,
};

static int qtnf_dbg_irq_stats(struct seq_file *s, void *data)
{
        struct qtnf_bus *bus = dev_get_drvdata(s->private);
        struct qtnf_pcie_pearl_state *ps = get_bus_priv(bus);
        u32 reg = readl(PCIE_HDP_INT_EN(ps->pcie_reg_base));
        u32 status;

        seq_printf(s, "pcie_irq_count(%u)\n", ps->base.pcie_irq_count);
        seq_printf(s, "pcie_irq_tx_count(%u)\n", ps->pcie_irq_tx_count);
        status = reg &  PCIE_HDP_INT_TX_BITS;
        seq_printf(s, "pcie_irq_tx_status(%s)\n",
                   (status == PCIE_HDP_INT_TX_BITS) ? "EN" : "DIS");
        seq_printf(s, "pcie_irq_rx_count(%u)\n", ps->pcie_irq_rx_count);
        status = reg &  PCIE_HDP_INT_RX_BITS;
        seq_printf(s, "pcie_irq_rx_status(%s)\n",
                   (status == PCIE_HDP_INT_RX_BITS) ? "EN" : "DIS");
        seq_printf(s, "pcie_irq_uf_count(%u)\n", ps->pcie_irq_uf_count);
        status = reg &  PCIE_HDP_INT_HHBM_UF;
        seq_printf(s, "pcie_irq_hhbm_uf_status(%s)\n",
                   (status == PCIE_HDP_INT_HHBM_UF) ? "EN" : "DIS");

        return 0;
}

static int qtnf_dbg_hdp_stats(struct seq_file *s, void *data)
{
        struct qtnf_bus *bus = dev_get_drvdata(s->private);
        struct qtnf_pcie_pearl_state *ps = get_bus_priv(bus);
        struct qtnf_pcie_bus_priv *priv = &ps->base;

        seq_printf(s, "tx_full_count(%u)\n", priv->tx_full_count);
        seq_printf(s, "tx_done_count(%u)\n", priv->tx_done_count);
        seq_printf(s, "tx_reclaim_done(%u)\n", priv->tx_reclaim_done);
        seq_printf(s, "tx_reclaim_req(%u)\n", priv->tx_reclaim_req);

        seq_printf(s, "tx_bd_r_index(%u)\n", priv->tx_bd_r_index);
        seq_printf(s, "tx_bd_p_index(%u)\n",
                   readl(PCIE_HDP_RX0DMA_CNT(ps->pcie_reg_base))
                        & (priv->tx_bd_num - 1));
        seq_printf(s, "tx_bd_w_index(%u)\n", priv->tx_bd_w_index);
        seq_printf(s, "tx queue len(%u)\n",
                   CIRC_CNT(priv->tx_bd_w_index, priv->tx_bd_r_index,
                            priv->tx_bd_num));

        seq_printf(s, "rx_bd_r_index(%u)\n", priv->rx_bd_r_index);
        seq_printf(s, "rx_bd_p_index(%u)\n",
                   readl(PCIE_HDP_TX0DMA_CNT(ps->pcie_reg_base))
                        & (priv->rx_bd_num - 1));
        seq_printf(s, "rx_bd_w_index(%u)\n", priv->rx_bd_w_index);
        seq_printf(s, "rx alloc queue len(%u)\n",
                   CIRC_SPACE(priv->rx_bd_w_index, priv->rx_bd_r_index,
                              priv->rx_bd_num));

        return 0;
}

static int qtnf_ep_fw_send(struct pci_dev *pdev, uint32_t size,
                           int blk, const u8 *pblk, const u8 *fw)
{
        struct qtnf_bus *bus = pci_get_drvdata(pdev);

        struct qtnf_pearl_fw_hdr *hdr;
        u8 *pdata;

        int hds = sizeof(*hdr);
        struct sk_buff *skb = NULL;
        int len = 0;
        int ret;

        skb = __dev_alloc_skb(QTN_PCIE_FW_BUFSZ, GFP_KERNEL);
        if (!skb)
                return -ENOMEM;

        skb->len = QTN_PCIE_FW_BUFSZ;
        skb->dev = NULL;

        hdr = (struct qtnf_pearl_fw_hdr *)skb->data;
        memcpy(hdr->boardflg, QTN_PCIE_BOARDFLG, strlen(QTN_PCIE_BOARDFLG));
        hdr->fwsize = cpu_to_le32(size);
        hdr->seqnum = cpu_to_le32(blk);

        if (blk)
                hdr->type = cpu_to_le32(QTN_FW_DSUB);
        else
                hdr->type = cpu_to_le32(QTN_FW_DBEGIN);

        pdata = skb->data + hds;

        len = QTN_PCIE_FW_BUFSZ - hds;
        if (pblk >= (fw + size - len)) {
                len = fw + size - pblk;
                hdr->type = cpu_to_le32(QTN_FW_DEND);
        }

        hdr->pktlen = cpu_to_le32(len);
        memcpy(pdata, pblk, len);
        hdr->crc = cpu_to_le32(~crc32(0, pdata, len));

        ret = qtnf_pcie_skb_send(bus, skb);

        return (ret == NETDEV_TX_OK) ? len : 0;
}

static int
qtnf_ep_fw_load(struct qtnf_pcie_pearl_state *ps, const u8 *fw, u32 fw_size)
{
        int blk_size = QTN_PCIE_FW_BUFSZ - sizeof(struct qtnf_pearl_fw_hdr);
        int blk_count = fw_size / blk_size + ((fw_size % blk_size) ? 1 : 0);
        const u8 *pblk = fw;
        int threshold = 0;
        int blk = 0;
        int len;

        pr_debug("FW upload started: fw_addr=0x%p size=%d\n", fw, fw_size);

        while (blk < blk_count) {
                if (++threshold > 10000) {
                        pr_err("FW upload failed: too many retries\n");
                        return -ETIMEDOUT;
                }

                len = qtnf_ep_fw_send(ps->base.pdev, fw_size, blk, pblk, fw);
                if (len <= 0)
                        continue;

                if (!((blk + 1) & QTN_PCIE_FW_DLMASK) ||
                    (blk == (blk_count - 1))) {
                        qtnf_set_state(&ps->bda->bda_rc_state,
                                       QTN_RC_FW_SYNC);
                        if (qtnf_poll_state(&ps->bda->bda_ep_state,
                                            QTN_EP_FW_SYNC,
                                            QTN_FW_DL_TIMEOUT_MS)) {
                                pr_err("FW upload failed: SYNC timed out\n");
                                return -ETIMEDOUT;
                        }

                        qtnf_clear_state(&ps->bda->bda_ep_state,
                                         QTN_EP_FW_SYNC);

                        if (qtnf_is_state(&ps->bda->bda_ep_state,
                                          QTN_EP_FW_RETRY)) {
                                if (blk == (blk_count - 1)) {
                                        int last_round =
                                                blk_count & QTN_PCIE_FW_DLMASK;
                                        blk -= last_round;
                                        pblk -= ((last_round - 1) *
                                                blk_size + len);
                                } else {
                                        blk -= QTN_PCIE_FW_DLMASK;
                                        pblk -= QTN_PCIE_FW_DLMASK * blk_size;
                                }

                                qtnf_clear_state(&ps->bda->bda_ep_state,
                                                 QTN_EP_FW_RETRY);

                                pr_warn("FW upload retry: block #%d\n", blk);
                                continue;
                        }

                        qtnf_pearl_data_tx_reclaim(ps);
                }

                pblk += len;
                blk++;
        }

        pr_debug("FW upload completed: totally sent %d blocks\n", blk);
        return 0;
}

static void qtnf_pearl_fw_work_handler(struct work_struct *work)
{
        struct qtnf_bus *bus = container_of(work, struct qtnf_bus, fw_work);
        struct qtnf_pcie_pearl_state *ps = (void *)get_bus_priv(bus);
        u32 state = QTN_RC_FW_LOADRDY | QTN_RC_FW_QLINK;
        const char *fwname = QTN_PCI_PEARL_FW_NAME;
        struct pci_dev *pdev = ps->base.pdev;
        const struct firmware *fw;
        int ret;

        if (ps->base.flashboot) {
                state |= QTN_RC_FW_FLASHBOOT;
        } else {
                ret = request_firmware(&fw, fwname, &pdev->dev);
                if (ret < 0) {
                        pr_err("failed to get firmware %s\n", fwname);
                        goto fw_load_exit;
                }
        }

        qtnf_set_state(&ps->bda->bda_rc_state, state);

        if (qtnf_poll_state(&ps->bda->bda_ep_state, QTN_EP_FW_LOADRDY,
                            QTN_FW_DL_TIMEOUT_MS)) {
                pr_err("card is not ready\n");

                if (!ps->base.flashboot)
                        release_firmware(fw);

                goto fw_load_exit;
        }

        qtnf_clear_state(&ps->bda->bda_ep_state, QTN_EP_FW_LOADRDY);

        if (ps->base.flashboot) {
                pr_info("booting firmware from flash\n");

        } else {
                pr_info("starting firmware upload: %s\n", fwname);

                ret = qtnf_ep_fw_load(ps, fw->data, fw->size);
                release_firmware(fw);
                if (ret) {
                        pr_err("firmware upload error\n");
                        goto fw_load_exit;
                }
        }

        if (qtnf_poll_state(&ps->bda->bda_ep_state, QTN_EP_FW_DONE,
                            QTN_FW_DL_TIMEOUT_MS)) {
                pr_err("firmware bringup timed out\n");
                goto fw_load_exit;
        }

        if (qtnf_poll_state(&ps->bda->bda_ep_state,
                            QTN_EP_FW_QLINK_DONE, QTN_FW_QLINK_TIMEOUT_MS)) {
                pr_err("firmware runtime failure\n");
                goto fw_load_exit;
        }

        pr_info("firmware is up and running\n");

        ret = qtnf_pcie_fw_boot_done(bus);
        if (ret)
                goto fw_load_exit;

        qtnf_debugfs_add_entry(bus, "hdp_stats", qtnf_dbg_hdp_stats);
        qtnf_debugfs_add_entry(bus, "irq_stats", qtnf_dbg_irq_stats);

fw_load_exit:
        put_device(&pdev->dev);
}

static void qtnf_pearl_reclaim_tasklet_fn(struct tasklet_struct *t)
{
        struct qtnf_pcie_pearl_state *ps = from_tasklet(ps, t, base.reclaim_tq);

        qtnf_pearl_data_tx_reclaim(ps);
        qtnf_en_txdone_irq(ps);
}

static u64 qtnf_pearl_dma_mask_get(void)
{
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
        return DMA_BIT_MASK(64);
#else
        return DMA_BIT_MASK(32);
#endif
}

static int qtnf_pcie_pearl_probe(struct qtnf_bus *bus, unsigned int tx_bd_size,
                                 unsigned int rx_bd_size)
{
        struct qtnf_shm_ipc_int ipc_int;
        struct qtnf_pcie_pearl_state *ps = get_bus_priv(bus);
        struct pci_dev *pdev = ps->base.pdev;
        int ret;

        bus->bus_ops = &qtnf_pcie_pearl_bus_ops;
        spin_lock_init(&ps->irq_lock);
        INIT_WORK(&bus->fw_work, qtnf_pearl_fw_work_handler);

        ps->pcie_reg_base = ps->base.dmareg_bar;
        ps->bda = ps->base.epmem_bar;
        writel(ps->base.msi_enabled, &ps->bda->bda_rc_msi_enabled);

        ret = qtnf_pcie_pearl_init_xfer(ps, tx_bd_size, rx_bd_size);
        if (ret) {
                pr_err("PCIE xfer init failed\n");
                return ret;
        }

        /* init default irq settings */
        qtnf_init_hdp_irqs(ps);

        /* start with disabled irqs */
        qtnf_disable_hdp_irqs(ps);

        ret = devm_request_irq(&pdev->dev, pdev->irq,
                               &qtnf_pcie_pearl_interrupt, 0,
                               "qtnf_pearl_irq", (void *)bus);
        if (ret) {
                pr_err("failed to request pcie irq %d\n", pdev->irq);
                qtnf_pearl_free_xfer_buffers(ps);
                return ret;
        }

        tasklet_setup(&ps->base.reclaim_tq, qtnf_pearl_reclaim_tasklet_fn);
        netif_napi_add_weight(bus->mux_dev, &bus->mux_napi,
                              qtnf_pcie_pearl_rx_poll, 10);

        ipc_int.fn = qtnf_pcie_pearl_ipc_gen_ep_int;
        ipc_int.arg = ps;
        qtnf_pcie_init_shm_ipc(&ps->base, &ps->bda->bda_shm_reg1,
                               &ps->bda->bda_shm_reg2, &ipc_int);

        return 0;
}

static void qtnf_pcie_pearl_remove(struct qtnf_bus *bus)
{
        struct qtnf_pcie_pearl_state *ps = get_bus_priv(bus);

        qtnf_pearl_reset_ep(ps);
        qtnf_pearl_free_xfer_buffers(ps);
}

#ifdef CONFIG_PM_SLEEP
static int qtnf_pcie_pearl_suspend(struct qtnf_bus *bus)
{
        return -EOPNOTSUPP;
}

static int qtnf_pcie_pearl_resume(struct qtnf_bus *bus)
{
        return 0;
}
#endif

struct qtnf_bus *qtnf_pcie_pearl_alloc(struct pci_dev *pdev)
{
        struct qtnf_bus *bus;
        struct qtnf_pcie_pearl_state *ps;

        bus = devm_kzalloc(&pdev->dev, sizeof(*bus) + sizeof(*ps), GFP_KERNEL);
        if (!bus)
                return NULL;

        ps = get_bus_priv(bus);
        ps->base.probe_cb = qtnf_pcie_pearl_probe;
        ps->base.remove_cb = qtnf_pcie_pearl_remove;
        ps->base.dma_mask_get_cb = qtnf_pearl_dma_mask_get;
#ifdef CONFIG_PM_SLEEP
        ps->base.resume_cb = qtnf_pcie_pearl_resume;
        ps->base.suspend_cb = qtnf_pcie_pearl_suspend;
#endif

        return bus;
}