root/drivers/dma/owl-dma.c
// SPDX-License-Identifier: GPL-2.0+
//
// Actions Semi Owl SoCs DMA driver
//
// Copyright (c) 2014 Actions Semi Inc.
// Author: David Liu <liuwei@actions-semi.com>
//
// Copyright (c) 2018 Linaro Ltd.
// Author: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>

#include <linux/bitops.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
#include <linux/dmapool.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_dma.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include "virt-dma.h"

#define OWL_DMA_FRAME_MAX_LENGTH                0xfffff

/* Global DMA Controller Registers */
#define OWL_DMA_IRQ_PD0                         0x00
#define OWL_DMA_IRQ_PD1                         0x04
#define OWL_DMA_IRQ_PD2                         0x08
#define OWL_DMA_IRQ_PD3                         0x0C
#define OWL_DMA_IRQ_EN0                         0x10
#define OWL_DMA_IRQ_EN1                         0x14
#define OWL_DMA_IRQ_EN2                         0x18
#define OWL_DMA_IRQ_EN3                         0x1C
#define OWL_DMA_SECURE_ACCESS_CTL               0x20
#define OWL_DMA_NIC_QOS                         0x24
#define OWL_DMA_DBGSEL                          0x28
#define OWL_DMA_IDLE_STAT                       0x2C

/* Channel Registers */
#define OWL_DMA_CHAN_BASE(i)                    (0x100 + (i) * 0x100)
#define OWL_DMAX_MODE                           0x00
#define OWL_DMAX_SOURCE                         0x04
#define OWL_DMAX_DESTINATION                    0x08
#define OWL_DMAX_FRAME_LEN                      0x0C
#define OWL_DMAX_FRAME_CNT                      0x10
#define OWL_DMAX_REMAIN_FRAME_CNT               0x14
#define OWL_DMAX_REMAIN_CNT                     0x18
#define OWL_DMAX_SOURCE_STRIDE                  0x1C
#define OWL_DMAX_DESTINATION_STRIDE             0x20
#define OWL_DMAX_START                          0x24
#define OWL_DMAX_PAUSE                          0x28
#define OWL_DMAX_CHAINED_CTL                    0x2C
#define OWL_DMAX_CONSTANT                       0x30
#define OWL_DMAX_LINKLIST_CTL                   0x34
#define OWL_DMAX_NEXT_DESCRIPTOR                0x38
#define OWL_DMAX_CURRENT_DESCRIPTOR_NUM         0x3C
#define OWL_DMAX_INT_CTL                        0x40
#define OWL_DMAX_INT_STATUS                     0x44
#define OWL_DMAX_CURRENT_SOURCE_POINTER         0x48
#define OWL_DMAX_CURRENT_DESTINATION_POINTER    0x4C

/* OWL_DMAX_MODE Bits */
#define OWL_DMA_MODE_TS(x)                      (((x) & GENMASK(5, 0)) << 0)
#define OWL_DMA_MODE_ST(x)                      (((x) & GENMASK(1, 0)) << 8)
#define OWL_DMA_MODE_ST_DEV                     OWL_DMA_MODE_ST(0)
#define OWL_DMA_MODE_ST_DCU                     OWL_DMA_MODE_ST(2)
#define OWL_DMA_MODE_ST_SRAM                    OWL_DMA_MODE_ST(3)
#define OWL_DMA_MODE_DT(x)                      (((x) & GENMASK(1, 0)) << 10)
#define OWL_DMA_MODE_DT_DEV                     OWL_DMA_MODE_DT(0)
#define OWL_DMA_MODE_DT_DCU                     OWL_DMA_MODE_DT(2)
#define OWL_DMA_MODE_DT_SRAM                    OWL_DMA_MODE_DT(3)
#define OWL_DMA_MODE_SAM(x)                     (((x) & GENMASK(1, 0)) << 16)
#define OWL_DMA_MODE_SAM_CONST                  OWL_DMA_MODE_SAM(0)
#define OWL_DMA_MODE_SAM_INC                    OWL_DMA_MODE_SAM(1)
#define OWL_DMA_MODE_SAM_STRIDE                 OWL_DMA_MODE_SAM(2)
#define OWL_DMA_MODE_DAM(x)                     (((x) & GENMASK(1, 0)) << 18)
#define OWL_DMA_MODE_DAM_CONST                  OWL_DMA_MODE_DAM(0)
#define OWL_DMA_MODE_DAM_INC                    OWL_DMA_MODE_DAM(1)
#define OWL_DMA_MODE_DAM_STRIDE                 OWL_DMA_MODE_DAM(2)
#define OWL_DMA_MODE_PW(x)                      (((x) & GENMASK(2, 0)) << 20)
#define OWL_DMA_MODE_CB                         BIT(23)
#define OWL_DMA_MODE_NDDBW(x)                   (((x) & 0x1) << 28)
#define OWL_DMA_MODE_NDDBW_32BIT                OWL_DMA_MODE_NDDBW(0)
#define OWL_DMA_MODE_NDDBW_8BIT                 OWL_DMA_MODE_NDDBW(1)
#define OWL_DMA_MODE_CFE                        BIT(29)
#define OWL_DMA_MODE_LME                        BIT(30)
#define OWL_DMA_MODE_CME                        BIT(31)

/* OWL_DMAX_LINKLIST_CTL Bits */
#define OWL_DMA_LLC_SAV(x)                      (((x) & GENMASK(1, 0)) << 8)
#define OWL_DMA_LLC_SAV_INC                     OWL_DMA_LLC_SAV(0)
#define OWL_DMA_LLC_SAV_LOAD_NEXT               OWL_DMA_LLC_SAV(1)
#define OWL_DMA_LLC_SAV_LOAD_PREV               OWL_DMA_LLC_SAV(2)
#define OWL_DMA_LLC_DAV(x)                      (((x) & GENMASK(1, 0)) << 10)
#define OWL_DMA_LLC_DAV_INC                     OWL_DMA_LLC_DAV(0)
#define OWL_DMA_LLC_DAV_LOAD_NEXT               OWL_DMA_LLC_DAV(1)
#define OWL_DMA_LLC_DAV_LOAD_PREV               OWL_DMA_LLC_DAV(2)
#define OWL_DMA_LLC_SUSPEND                     BIT(16)

/* OWL_DMAX_INT_CTL Bits */
#define OWL_DMA_INTCTL_BLOCK                    BIT(0)
#define OWL_DMA_INTCTL_SUPER_BLOCK              BIT(1)
#define OWL_DMA_INTCTL_FRAME                    BIT(2)
#define OWL_DMA_INTCTL_HALF_FRAME               BIT(3)
#define OWL_DMA_INTCTL_LAST_FRAME               BIT(4)

/* OWL_DMAX_INT_STATUS Bits */
#define OWL_DMA_INTSTAT_BLOCK                   BIT(0)
#define OWL_DMA_INTSTAT_SUPER_BLOCK             BIT(1)
#define OWL_DMA_INTSTAT_FRAME                   BIT(2)
#define OWL_DMA_INTSTAT_HALF_FRAME              BIT(3)
#define OWL_DMA_INTSTAT_LAST_FRAME              BIT(4)

/* Pack shift and newshift in a single word */
#define BIT_FIELD(val, width, shift, newshift)  \
                ((((val) >> (shift)) & ((BIT(width)) - 1)) << (newshift))

/* Frame count value is fixed as 1 */
#define FCNT_VAL                                0x1

/**
 * enum owl_dmadesc_offsets - Describe DMA descriptor, hardware link
 * list for dma transfer
 * @OWL_DMADESC_NEXT_LLI: physical address of the next link list
 * @OWL_DMADESC_SADDR: source physical address
 * @OWL_DMADESC_DADDR: destination physical address
 * @OWL_DMADESC_FLEN: frame length
 * @OWL_DMADESC_SRC_STRIDE: source stride
 * @OWL_DMADESC_DST_STRIDE: destination stride
 * @OWL_DMADESC_CTRLA: dma_mode and linklist ctrl config
 * @OWL_DMADESC_CTRLB: interrupt config
 * @OWL_DMADESC_CONST_NUM: data for constant fill
 * @OWL_DMADESC_SIZE: max size of this enum
 */
enum owl_dmadesc_offsets {
        OWL_DMADESC_NEXT_LLI = 0,
        OWL_DMADESC_SADDR,
        OWL_DMADESC_DADDR,
        OWL_DMADESC_FLEN,
        OWL_DMADESC_SRC_STRIDE,
        OWL_DMADESC_DST_STRIDE,
        OWL_DMADESC_CTRLA,
        OWL_DMADESC_CTRLB,
        OWL_DMADESC_CONST_NUM,
        OWL_DMADESC_SIZE
};

enum owl_dma_id {
        S900_DMA,
        S700_DMA,
};

/**
 * struct owl_dma_lli - Link list for dma transfer
 * @hw: hardware link list
 * @phys: physical address of hardware link list
 * @node: node for txd's lli_list
 */
struct owl_dma_lli {
        u32                     hw[OWL_DMADESC_SIZE];
        dma_addr_t              phys;
        struct list_head        node;
};

/**
 * struct owl_dma_txd - Wrapper for struct dma_async_tx_descriptor
 * @vd: virtual DMA descriptor
 * @lli_list: link list of lli nodes
 * @cyclic: flag to indicate cyclic transfers
 */
struct owl_dma_txd {
        struct virt_dma_desc    vd;
        struct list_head        lli_list;
        bool                    cyclic;
};

/**
 * struct owl_dma_pchan - Holder for the physical channels
 * @id: physical index to this channel
 * @base: virtual memory base for the dma channel
 * @vchan: the virtual channel currently being served by this physical channel
 */
struct owl_dma_pchan {
        u32                     id;
        void __iomem            *base;
        struct owl_dma_vchan    *vchan;
};

/**
 * struct owl_dma_vchan - Wrapper for DMA ENGINE channel
 * @vc: wrapped virtual channel
 * @pchan: the physical channel utilized by this channel
 * @txd: active transaction on this channel
 * @cfg: slave configuration for this channel
 * @drq: physical DMA request ID for this channel
 */
struct owl_dma_vchan {
        struct virt_dma_chan    vc;
        struct owl_dma_pchan    *pchan;
        struct owl_dma_txd      *txd;
        struct dma_slave_config cfg;
        u8                      drq;
};

/**
 * struct owl_dma - Holder for the Owl DMA controller
 * @dma: dma engine for this instance
 * @base: virtual memory base for the DMA controller
 * @clk: clock for the DMA controller
 * @lock: a lock to use when change DMA controller global register
 * @lli_pool: a pool for the LLI descriptors
 * @irq: interrupt ID for the DMA controller
 * @nr_pchans: the number of physical channels
 * @pchans: array of data for the physical channels
 * @nr_vchans: the number of physical channels
 * @vchans: array of data for the physical channels
 * @devid: device id based on OWL SoC
 */
struct owl_dma {
        struct dma_device       dma;
        void __iomem            *base;
        struct clk              *clk;
        spinlock_t              lock;
        struct dma_pool         *lli_pool;
        int                     irq;

        unsigned int            nr_pchans;
        struct owl_dma_pchan    *pchans;

        unsigned int            nr_vchans;
        struct owl_dma_vchan    *vchans;
        enum owl_dma_id         devid;
};

static void pchan_update(struct owl_dma_pchan *pchan, u32 reg,
                         u32 val, bool state)
{
        u32 regval;

        regval = readl(pchan->base + reg);

        if (state)
                regval |= val;
        else
                regval &= ~val;

        writel(regval, pchan->base + reg);
}

static void pchan_writel(struct owl_dma_pchan *pchan, u32 reg, u32 data)
{
        writel(data, pchan->base + reg);
}

static u32 pchan_readl(struct owl_dma_pchan *pchan, u32 reg)
{
        return readl(pchan->base + reg);
}

static void dma_update(struct owl_dma *od, u32 reg, u32 val, bool state)
{
        u32 regval;

        regval = readl(od->base + reg);

        if (state)
                regval |= val;
        else
                regval &= ~val;

        writel(regval, od->base + reg);
}

static void dma_writel(struct owl_dma *od, u32 reg, u32 data)
{
        writel(data, od->base + reg);
}

static u32 dma_readl(struct owl_dma *od, u32 reg)
{
        return readl(od->base + reg);
}

static inline struct owl_dma *to_owl_dma(struct dma_device *dd)
{
        return container_of(dd, struct owl_dma, dma);
}

static struct device *chan2dev(struct dma_chan *chan)
{
        return &chan->dev->device;
}

static inline struct owl_dma_vchan *to_owl_vchan(struct dma_chan *chan)
{
        return container_of(chan, struct owl_dma_vchan, vc.chan);
}

static inline struct owl_dma_txd *to_owl_txd(struct dma_async_tx_descriptor *tx)
{
        return container_of(tx, struct owl_dma_txd, vd.tx);
}

static inline u32 llc_hw_ctrla(u32 mode, u32 llc_ctl)
{
        u32 ctl;

        ctl = BIT_FIELD(mode, 4, 28, 28) |
              BIT_FIELD(mode, 8, 16, 20) |
              BIT_FIELD(mode, 4, 8, 16) |
              BIT_FIELD(mode, 6, 0, 10) |
              BIT_FIELD(llc_ctl, 2, 10, 8) |
              BIT_FIELD(llc_ctl, 2, 8, 6);

        return ctl;
}

static inline u32 llc_hw_ctrlb(u32 int_ctl)
{
        u32 ctl;

        /*
         * Irrespective of the SoC, ctrlb value starts filling from
         * bit 18.
         */
        ctl = BIT_FIELD(int_ctl, 7, 0, 18);

        return ctl;
}

static u32 llc_hw_flen(struct owl_dma_lli *lli)
{
        return lli->hw[OWL_DMADESC_FLEN] & GENMASK(19, 0);
}

static void owl_dma_free_lli(struct owl_dma *od,
                             struct owl_dma_lli *lli)
{
        list_del(&lli->node);
        dma_pool_free(od->lli_pool, lli, lli->phys);
}

static struct owl_dma_lli *owl_dma_alloc_lli(struct owl_dma *od)
{
        struct owl_dma_lli *lli;
        dma_addr_t phys;

        lli = dma_pool_alloc(od->lli_pool, GFP_NOWAIT, &phys);
        if (!lli)
                return NULL;

        INIT_LIST_HEAD(&lli->node);
        lli->phys = phys;

        return lli;
}

static struct owl_dma_lli *owl_dma_add_lli(struct owl_dma_txd *txd,
                                           struct owl_dma_lli *prev,
                                           struct owl_dma_lli *next,
                                           bool is_cyclic)
{
        if (!is_cyclic)
                list_add_tail(&next->node, &txd->lli_list);

        if (prev) {
                prev->hw[OWL_DMADESC_NEXT_LLI] = next->phys;
                prev->hw[OWL_DMADESC_CTRLA] |=
                                        llc_hw_ctrla(OWL_DMA_MODE_LME, 0);
        }

        return next;
}

static inline int owl_dma_cfg_lli(struct owl_dma_vchan *vchan,
                                  struct owl_dma_lli *lli,
                                  dma_addr_t src, dma_addr_t dst,
                                  u32 len, enum dma_transfer_direction dir,
                                  struct dma_slave_config *sconfig,
                                  bool is_cyclic)
{
        struct owl_dma *od = to_owl_dma(vchan->vc.chan.device);
        u32 mode, ctrlb;

        mode = OWL_DMA_MODE_PW(0);

        switch (dir) {
        case DMA_MEM_TO_MEM:
                mode |= OWL_DMA_MODE_TS(0) | OWL_DMA_MODE_ST_DCU |
                        OWL_DMA_MODE_DT_DCU | OWL_DMA_MODE_SAM_INC |
                        OWL_DMA_MODE_DAM_INC;

                break;
        case DMA_MEM_TO_DEV:
                mode |= OWL_DMA_MODE_TS(vchan->drq)
                        | OWL_DMA_MODE_ST_DCU | OWL_DMA_MODE_DT_DEV
                        | OWL_DMA_MODE_SAM_INC | OWL_DMA_MODE_DAM_CONST;

                /*
                 * Hardware only supports 32bit and 8bit buswidth. Since the
                 * default is 32bit, select 8bit only when requested.
                 */
                if (sconfig->dst_addr_width == DMA_SLAVE_BUSWIDTH_1_BYTE)
                        mode |= OWL_DMA_MODE_NDDBW_8BIT;

                break;
        case DMA_DEV_TO_MEM:
                 mode |= OWL_DMA_MODE_TS(vchan->drq)
                        | OWL_DMA_MODE_ST_DEV | OWL_DMA_MODE_DT_DCU
                        | OWL_DMA_MODE_SAM_CONST | OWL_DMA_MODE_DAM_INC;

                /*
                 * Hardware only supports 32bit and 8bit buswidth. Since the
                 * default is 32bit, select 8bit only when requested.
                 */
                if (sconfig->src_addr_width == DMA_SLAVE_BUSWIDTH_1_BYTE)
                        mode |= OWL_DMA_MODE_NDDBW_8BIT;

                break;
        default:
                return -EINVAL;
        }

        lli->hw[OWL_DMADESC_CTRLA] = llc_hw_ctrla(mode,
                                                  OWL_DMA_LLC_SAV_LOAD_NEXT |
                                                  OWL_DMA_LLC_DAV_LOAD_NEXT);

        if (is_cyclic)
                ctrlb = llc_hw_ctrlb(OWL_DMA_INTCTL_BLOCK);
        else
                ctrlb = llc_hw_ctrlb(OWL_DMA_INTCTL_SUPER_BLOCK);

        lli->hw[OWL_DMADESC_NEXT_LLI] = 0; /* One link list by default */
        lli->hw[OWL_DMADESC_SADDR] = src;
        lli->hw[OWL_DMADESC_DADDR] = dst;
        lli->hw[OWL_DMADESC_SRC_STRIDE] = 0;
        lli->hw[OWL_DMADESC_DST_STRIDE] = 0;

        if (od->devid == S700_DMA) {
                /* Max frame length is 1MB */
                lli->hw[OWL_DMADESC_FLEN] = len;
                /*
                 * On S700, word starts from offset 0x1C is shared between
                 * frame count and ctrlb, where first 12 bits are for frame
                 * count and rest of 20 bits are for ctrlb.
                 */
                lli->hw[OWL_DMADESC_CTRLB] = FCNT_VAL | ctrlb;
        } else {
                /*
                 * On S900, word starts from offset 0xC is shared between
                 * frame length (max frame length is 1MB) and frame count,
                 * where first 20 bits are for frame length and rest of
                 * 12 bits are for frame count.
                 */
                lli->hw[OWL_DMADESC_FLEN] = len | FCNT_VAL << 20;
                lli->hw[OWL_DMADESC_CTRLB] = ctrlb;
        }

        return 0;
}

static struct owl_dma_pchan *owl_dma_get_pchan(struct owl_dma *od,
                                               struct owl_dma_vchan *vchan)
{
        struct owl_dma_pchan *pchan = NULL;
        unsigned long flags;
        int i;

        for (i = 0; i < od->nr_pchans; i++) {
                pchan = &od->pchans[i];

                spin_lock_irqsave(&od->lock, flags);
                if (!pchan->vchan) {
                        pchan->vchan = vchan;
                        spin_unlock_irqrestore(&od->lock, flags);
                        break;
                }

                spin_unlock_irqrestore(&od->lock, flags);
        }

        return pchan;
}

static int owl_dma_pchan_busy(struct owl_dma *od, struct owl_dma_pchan *pchan)
{
        unsigned int val;

        val = dma_readl(od, OWL_DMA_IDLE_STAT);

        return !(val & (1 << pchan->id));
}

static void owl_dma_terminate_pchan(struct owl_dma *od,
                                    struct owl_dma_pchan *pchan)
{
        unsigned long flags;
        u32 irq_pd;

        pchan_writel(pchan, OWL_DMAX_START, 0);
        pchan_update(pchan, OWL_DMAX_INT_STATUS, 0xff, false);

        spin_lock_irqsave(&od->lock, flags);
        dma_update(od, OWL_DMA_IRQ_EN0, (1 << pchan->id), false);

        irq_pd = dma_readl(od, OWL_DMA_IRQ_PD0);
        if (irq_pd & (1 << pchan->id)) {
                dev_warn(od->dma.dev,
                         "terminating pchan %d that still has pending irq\n",
                         pchan->id);
                dma_writel(od, OWL_DMA_IRQ_PD0, (1 << pchan->id));
        }

        pchan->vchan = NULL;

        spin_unlock_irqrestore(&od->lock, flags);
}

static void owl_dma_pause_pchan(struct owl_dma_pchan *pchan)
{
        pchan_writel(pchan, 1, OWL_DMAX_PAUSE);
}

static void owl_dma_resume_pchan(struct owl_dma_pchan *pchan)
{
        pchan_writel(pchan, 0, OWL_DMAX_PAUSE);
}

static int owl_dma_start_next_txd(struct owl_dma_vchan *vchan)
{
        struct owl_dma *od = to_owl_dma(vchan->vc.chan.device);
        struct virt_dma_desc *vd = vchan_next_desc(&vchan->vc);
        struct owl_dma_pchan *pchan = vchan->pchan;
        struct owl_dma_txd *txd = to_owl_txd(&vd->tx);
        struct owl_dma_lli *lli;
        unsigned long flags;
        u32 int_ctl;

        list_del(&vd->node);

        vchan->txd = txd;

        /* Wait for channel inactive */
        while (owl_dma_pchan_busy(od, pchan))
                cpu_relax();

        lli = list_first_entry(&txd->lli_list,
                               struct owl_dma_lli, node);

        if (txd->cyclic)
                int_ctl = OWL_DMA_INTCTL_BLOCK;
        else
                int_ctl = OWL_DMA_INTCTL_SUPER_BLOCK;

        pchan_writel(pchan, OWL_DMAX_MODE, OWL_DMA_MODE_LME);
        pchan_writel(pchan, OWL_DMAX_LINKLIST_CTL,
                     OWL_DMA_LLC_SAV_LOAD_NEXT | OWL_DMA_LLC_DAV_LOAD_NEXT);
        pchan_writel(pchan, OWL_DMAX_NEXT_DESCRIPTOR, lli->phys);
        pchan_writel(pchan, OWL_DMAX_INT_CTL, int_ctl);

        /* Clear IRQ status for this pchan */
        pchan_update(pchan, OWL_DMAX_INT_STATUS, 0xff, false);

        spin_lock_irqsave(&od->lock, flags);

        dma_update(od, OWL_DMA_IRQ_EN0, (1 << pchan->id), true);

        spin_unlock_irqrestore(&od->lock, flags);

        dev_dbg(chan2dev(&vchan->vc.chan), "starting pchan %d\n", pchan->id);

        /* Start DMA transfer for this pchan */
        pchan_writel(pchan, OWL_DMAX_START, 0x1);

        return 0;
}

static void owl_dma_phy_free(struct owl_dma *od, struct owl_dma_vchan *vchan)
{
        /* Ensure that the physical channel is stopped */
        owl_dma_terminate_pchan(od, vchan->pchan);

        vchan->pchan = NULL;
}

static irqreturn_t owl_dma_interrupt(int irq, void *dev_id)
{
        struct owl_dma *od = dev_id;
        struct owl_dma_vchan *vchan;
        struct owl_dma_pchan *pchan;
        unsigned long pending;
        int i;
        unsigned int global_irq_pending, chan_irq_pending;

        spin_lock(&od->lock);

        pending = dma_readl(od, OWL_DMA_IRQ_PD0);

        /* Clear IRQ status for each pchan */
        for_each_set_bit(i, &pending, od->nr_pchans) {
                pchan = &od->pchans[i];
                pchan_update(pchan, OWL_DMAX_INT_STATUS, 0xff, false);
        }

        /* Clear pending IRQ */
        dma_writel(od, OWL_DMA_IRQ_PD0, pending);

        /* Check missed pending IRQ */
        for (i = 0; i < od->nr_pchans; i++) {
                pchan = &od->pchans[i];
                chan_irq_pending = pchan_readl(pchan, OWL_DMAX_INT_CTL) &
                                   pchan_readl(pchan, OWL_DMAX_INT_STATUS);

                /* Dummy read to ensure OWL_DMA_IRQ_PD0 value is updated */
                dma_readl(od, OWL_DMA_IRQ_PD0);

                global_irq_pending = dma_readl(od, OWL_DMA_IRQ_PD0);

                if (chan_irq_pending && !(global_irq_pending & BIT(i))) {
                        dev_dbg(od->dma.dev,
                                "global and channel IRQ pending match err\n");

                        /* Clear IRQ status for this pchan */
                        pchan_update(pchan, OWL_DMAX_INT_STATUS,
                                     0xff, false);

                        /* Update global IRQ pending */
                        pending |= BIT(i);
                }
        }

        spin_unlock(&od->lock);

        for_each_set_bit(i, &pending, od->nr_pchans) {
                struct owl_dma_txd *txd;

                pchan = &od->pchans[i];

                vchan = pchan->vchan;
                if (!vchan) {
                        dev_warn(od->dma.dev, "no vchan attached on pchan %d\n",
                                 pchan->id);
                        continue;
                }

                spin_lock(&vchan->vc.lock);

                txd = vchan->txd;
                if (txd) {
                        vchan->txd = NULL;

                        vchan_cookie_complete(&txd->vd);

                        /*
                         * Start the next descriptor (if any),
                         * otherwise free this channel.
                         */
                        if (vchan_next_desc(&vchan->vc))
                                owl_dma_start_next_txd(vchan);
                        else
                                owl_dma_phy_free(od, vchan);
                }

                spin_unlock(&vchan->vc.lock);
        }

        return IRQ_HANDLED;
}

static void owl_dma_free_txd(struct owl_dma *od, struct owl_dma_txd *txd)
{
        struct owl_dma_lli *lli, *_lli;

        if (unlikely(!txd))
                return;

        list_for_each_entry_safe(lli, _lli, &txd->lli_list, node)
                owl_dma_free_lli(od, lli);

        kfree(txd);
}

static void owl_dma_desc_free(struct virt_dma_desc *vd)
{
        struct owl_dma *od = to_owl_dma(vd->tx.chan->device);
        struct owl_dma_txd *txd = to_owl_txd(&vd->tx);

        owl_dma_free_txd(od, txd);
}

static int owl_dma_terminate_all(struct dma_chan *chan)
{
        struct owl_dma *od = to_owl_dma(chan->device);
        struct owl_dma_vchan *vchan = to_owl_vchan(chan);
        unsigned long flags;
        LIST_HEAD(head);

        spin_lock_irqsave(&vchan->vc.lock, flags);

        if (vchan->pchan)
                owl_dma_phy_free(od, vchan);

        if (vchan->txd) {
                owl_dma_desc_free(&vchan->txd->vd);
                vchan->txd = NULL;
        }

        vchan_get_all_descriptors(&vchan->vc, &head);

        spin_unlock_irqrestore(&vchan->vc.lock, flags);

        vchan_dma_desc_free_list(&vchan->vc, &head);

        return 0;
}

static int owl_dma_config(struct dma_chan *chan,
                          struct dma_slave_config *config)
{
        struct owl_dma_vchan *vchan = to_owl_vchan(chan);

        /* Reject definitely invalid configurations */
        if (config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES ||
            config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
                return -EINVAL;

        memcpy(&vchan->cfg, config, sizeof(struct dma_slave_config));

        return 0;
}

static int owl_dma_pause(struct dma_chan *chan)
{
        struct owl_dma_vchan *vchan = to_owl_vchan(chan);
        unsigned long flags;

        spin_lock_irqsave(&vchan->vc.lock, flags);

        owl_dma_pause_pchan(vchan->pchan);

        spin_unlock_irqrestore(&vchan->vc.lock, flags);

        return 0;
}

static int owl_dma_resume(struct dma_chan *chan)
{
        struct owl_dma_vchan *vchan = to_owl_vchan(chan);
        unsigned long flags;

        if (!vchan->pchan && !vchan->txd)
                return 0;

        dev_dbg(chan2dev(chan), "vchan %p: resume\n", &vchan->vc);

        spin_lock_irqsave(&vchan->vc.lock, flags);

        owl_dma_resume_pchan(vchan->pchan);

        spin_unlock_irqrestore(&vchan->vc.lock, flags);

        return 0;
}

static u32 owl_dma_getbytes_chan(struct owl_dma_vchan *vchan)
{
        struct owl_dma_pchan *pchan;
        struct owl_dma_txd *txd;
        struct owl_dma_lli *lli;
        unsigned int next_lli_phy;
        size_t bytes;

        pchan = vchan->pchan;
        txd = vchan->txd;

        if (!pchan || !txd)
                return 0;

        /* Get remain count of current node in link list */
        bytes = pchan_readl(pchan, OWL_DMAX_REMAIN_CNT);

        /* Loop through the preceding nodes to get total remaining bytes */
        if (pchan_readl(pchan, OWL_DMAX_MODE) & OWL_DMA_MODE_LME) {
                next_lli_phy = pchan_readl(pchan, OWL_DMAX_NEXT_DESCRIPTOR);
                list_for_each_entry(lli, &txd->lli_list, node) {
                        /* Start from the next active node */
                        if (lli->phys == next_lli_phy) {
                                list_for_each_entry(lli, &txd->lli_list, node)
                                        bytes += llc_hw_flen(lli);
                                break;
                        }
                }
        }

        return bytes;
}

static enum dma_status owl_dma_tx_status(struct dma_chan *chan,
                                         dma_cookie_t cookie,
                                         struct dma_tx_state *state)
{
        struct owl_dma_vchan *vchan = to_owl_vchan(chan);
        struct owl_dma_lli *lli;
        struct virt_dma_desc *vd;
        struct owl_dma_txd *txd;
        enum dma_status ret;
        unsigned long flags;
        size_t bytes = 0;

        ret = dma_cookie_status(chan, cookie, state);
        if (ret == DMA_COMPLETE || !state)
                return ret;

        spin_lock_irqsave(&vchan->vc.lock, flags);

        vd = vchan_find_desc(&vchan->vc, cookie);
        if (vd) {
                txd = to_owl_txd(&vd->tx);
                list_for_each_entry(lli, &txd->lli_list, node)
                        bytes += llc_hw_flen(lli);
        } else {
                bytes = owl_dma_getbytes_chan(vchan);
        }

        spin_unlock_irqrestore(&vchan->vc.lock, flags);

        dma_set_residue(state, bytes);

        return ret;
}

static void owl_dma_phy_alloc_and_start(struct owl_dma_vchan *vchan)
{
        struct owl_dma *od = to_owl_dma(vchan->vc.chan.device);
        struct owl_dma_pchan *pchan;

        pchan = owl_dma_get_pchan(od, vchan);
        if (!pchan)
                return;

        dev_dbg(od->dma.dev, "allocated pchan %d\n", pchan->id);

        vchan->pchan = pchan;
        owl_dma_start_next_txd(vchan);
}

static void owl_dma_issue_pending(struct dma_chan *chan)
{
        struct owl_dma_vchan *vchan = to_owl_vchan(chan);
        unsigned long flags;

        spin_lock_irqsave(&vchan->vc.lock, flags);
        if (vchan_issue_pending(&vchan->vc)) {
                if (!vchan->pchan)
                        owl_dma_phy_alloc_and_start(vchan);
        }
        spin_unlock_irqrestore(&vchan->vc.lock, flags);
}

static struct dma_async_tx_descriptor
                *owl_dma_prep_memcpy(struct dma_chan *chan,
                                     dma_addr_t dst, dma_addr_t src,
                                     size_t len, unsigned long flags)
{
        struct owl_dma *od = to_owl_dma(chan->device);
        struct owl_dma_vchan *vchan = to_owl_vchan(chan);
        struct owl_dma_txd *txd;
        struct owl_dma_lli *lli, *prev = NULL;
        size_t offset, bytes;
        int ret;

        if (!len)
                return NULL;

        txd = kzalloc_obj(*txd, GFP_NOWAIT);
        if (!txd)
                return NULL;

        INIT_LIST_HEAD(&txd->lli_list);

        /* Process the transfer as frame by frame */
        for (offset = 0; offset < len; offset += bytes) {
                lli = owl_dma_alloc_lli(od);
                if (!lli) {
                        dev_warn(chan2dev(chan), "failed to allocate lli\n");
                        goto err_txd_free;
                }

                bytes = min_t(size_t, (len - offset), OWL_DMA_FRAME_MAX_LENGTH);

                ret = owl_dma_cfg_lli(vchan, lli, src + offset, dst + offset,
                                      bytes, DMA_MEM_TO_MEM,
                                      &vchan->cfg, txd->cyclic);
                if (ret) {
                        dev_warn(chan2dev(chan), "failed to config lli\n");
                        goto err_txd_free;
                }

                prev = owl_dma_add_lli(txd, prev, lli, false);
        }

        return vchan_tx_prep(&vchan->vc, &txd->vd, flags);

err_txd_free:
        owl_dma_free_txd(od, txd);
        return NULL;
}

static struct dma_async_tx_descriptor
                *owl_dma_prep_slave_sg(struct dma_chan *chan,
                                       struct scatterlist *sgl,
                                       unsigned int sg_len,
                                       enum dma_transfer_direction dir,
                                       unsigned long flags, void *context)
{
        struct owl_dma *od = to_owl_dma(chan->device);
        struct owl_dma_vchan *vchan = to_owl_vchan(chan);
        struct dma_slave_config *sconfig = &vchan->cfg;
        struct owl_dma_txd *txd;
        struct owl_dma_lli *lli, *prev = NULL;
        struct scatterlist *sg;
        dma_addr_t addr, src = 0, dst = 0;
        size_t len;
        int ret, i;

        txd = kzalloc_obj(*txd, GFP_NOWAIT);
        if (!txd)
                return NULL;

        INIT_LIST_HEAD(&txd->lli_list);

        for_each_sg(sgl, sg, sg_len, i) {
                addr = sg_dma_address(sg);
                len = sg_dma_len(sg);

                if (len > OWL_DMA_FRAME_MAX_LENGTH) {
                        dev_err(od->dma.dev,
                                "frame length exceeds max supported length");
                        goto err_txd_free;
                }

                lli = owl_dma_alloc_lli(od);
                if (!lli) {
                        dev_err(chan2dev(chan), "failed to allocate lli");
                        goto err_txd_free;
                }

                if (dir == DMA_MEM_TO_DEV) {
                        src = addr;
                        dst = sconfig->dst_addr;
                } else {
                        src = sconfig->src_addr;
                        dst = addr;
                }

                ret = owl_dma_cfg_lli(vchan, lli, src, dst, len, dir, sconfig,
                                      txd->cyclic);
                if (ret) {
                        dev_warn(chan2dev(chan), "failed to config lli");
                        goto err_txd_free;
                }

                prev = owl_dma_add_lli(txd, prev, lli, false);
        }

        return vchan_tx_prep(&vchan->vc, &txd->vd, flags);

err_txd_free:
        owl_dma_free_txd(od, txd);

        return NULL;
}

static struct dma_async_tx_descriptor
                *owl_prep_dma_cyclic(struct dma_chan *chan,
                                     dma_addr_t buf_addr, size_t buf_len,
                                     size_t period_len,
                                     enum dma_transfer_direction dir,
                                     unsigned long flags)
{
        struct owl_dma *od = to_owl_dma(chan->device);
        struct owl_dma_vchan *vchan = to_owl_vchan(chan);
        struct dma_slave_config *sconfig = &vchan->cfg;
        struct owl_dma_txd *txd;
        struct owl_dma_lli *lli, *prev = NULL, *first = NULL;
        dma_addr_t src = 0, dst = 0;
        unsigned int periods = buf_len / period_len;
        int ret, i;

        txd = kzalloc_obj(*txd, GFP_NOWAIT);
        if (!txd)
                return NULL;

        INIT_LIST_HEAD(&txd->lli_list);
        txd->cyclic = true;

        for (i = 0; i < periods; i++) {
                lli = owl_dma_alloc_lli(od);
                if (!lli) {
                        dev_warn(chan2dev(chan), "failed to allocate lli");
                        goto err_txd_free;
                }

                if (dir == DMA_MEM_TO_DEV) {
                        src = buf_addr + (period_len * i);
                        dst = sconfig->dst_addr;
                } else if (dir == DMA_DEV_TO_MEM) {
                        src = sconfig->src_addr;
                        dst = buf_addr + (period_len * i);
                }

                ret = owl_dma_cfg_lli(vchan, lli, src, dst, period_len,
                                      dir, sconfig, txd->cyclic);
                if (ret) {
                        dev_warn(chan2dev(chan), "failed to config lli");
                        goto err_txd_free;
                }

                if (!first)
                        first = lli;

                prev = owl_dma_add_lli(txd, prev, lli, false);
        }

        /* close the cyclic list */
        owl_dma_add_lli(txd, prev, first, true);

        return vchan_tx_prep(&vchan->vc, &txd->vd, flags);

err_txd_free:
        owl_dma_free_txd(od, txd);

        return NULL;
}

static void owl_dma_free_chan_resources(struct dma_chan *chan)
{
        struct owl_dma_vchan *vchan = to_owl_vchan(chan);

        /* Ensure all queued descriptors are freed */
        vchan_free_chan_resources(&vchan->vc);
}

static inline void owl_dma_free(struct owl_dma *od)
{
        struct owl_dma_vchan *vchan = NULL;
        struct owl_dma_vchan *next;

        list_for_each_entry_safe(vchan,
                                 next, &od->dma.channels, vc.chan.device_node) {
                list_del(&vchan->vc.chan.device_node);
                tasklet_kill(&vchan->vc.task);
        }
}

static struct dma_chan *owl_dma_of_xlate(struct of_phandle_args *dma_spec,
                                         struct of_dma *ofdma)
{
        struct owl_dma *od = ofdma->of_dma_data;
        struct owl_dma_vchan *vchan;
        struct dma_chan *chan;
        u8 drq = dma_spec->args[0];

        if (drq > od->nr_vchans)
                return NULL;

        chan = dma_get_any_slave_channel(&od->dma);
        if (!chan)
                return NULL;

        vchan = to_owl_vchan(chan);
        vchan->drq = drq;

        return chan;
}

static const struct of_device_id owl_dma_match[] = {
        { .compatible = "actions,s500-dma", .data = (void *)S900_DMA,},
        { .compatible = "actions,s700-dma", .data = (void *)S700_DMA,},
        { .compatible = "actions,s900-dma", .data = (void *)S900_DMA,},
        { /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, owl_dma_match);

static int owl_dma_probe(struct platform_device *pdev)
{
        struct device_node *np = pdev->dev.of_node;
        struct owl_dma *od;
        int ret, i, nr_channels, nr_requests;

        od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL);
        if (!od)
                return -ENOMEM;

        od->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(od->base))
                return PTR_ERR(od->base);

        ret = of_property_read_u32(np, "dma-channels", &nr_channels);
        if (ret) {
                dev_err(&pdev->dev, "can't get dma-channels\n");
                return ret;
        }

        ret = of_property_read_u32(np, "dma-requests", &nr_requests);
        if (ret) {
                dev_err(&pdev->dev, "can't get dma-requests\n");
                return ret;
        }

        dev_info(&pdev->dev, "dma-channels %d, dma-requests %d\n",
                 nr_channels, nr_requests);

        od->devid = (uintptr_t)of_device_get_match_data(&pdev->dev);

        od->nr_pchans = nr_channels;
        od->nr_vchans = nr_requests;

        pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);

        platform_set_drvdata(pdev, od);
        spin_lock_init(&od->lock);

        dma_cap_set(DMA_MEMCPY, od->dma.cap_mask);
        dma_cap_set(DMA_SLAVE, od->dma.cap_mask);
        dma_cap_set(DMA_CYCLIC, od->dma.cap_mask);

        od->dma.dev = &pdev->dev;
        od->dma.device_free_chan_resources = owl_dma_free_chan_resources;
        od->dma.device_tx_status = owl_dma_tx_status;
        od->dma.device_issue_pending = owl_dma_issue_pending;
        od->dma.device_prep_dma_memcpy = owl_dma_prep_memcpy;
        od->dma.device_prep_slave_sg = owl_dma_prep_slave_sg;
        od->dma.device_prep_dma_cyclic = owl_prep_dma_cyclic;
        od->dma.device_config = owl_dma_config;
        od->dma.device_pause = owl_dma_pause;
        od->dma.device_resume = owl_dma_resume;
        od->dma.device_terminate_all = owl_dma_terminate_all;
        od->dma.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
        od->dma.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
        od->dma.directions = BIT(DMA_MEM_TO_MEM);
        od->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;

        INIT_LIST_HEAD(&od->dma.channels);

        od->clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(od->clk)) {
                dev_err(&pdev->dev, "unable to get clock\n");
                return PTR_ERR(od->clk);
        }

        /*
         * Even though the DMA controller is capable of generating 4
         * IRQ's for DMA priority feature, we only use 1 IRQ for
         * simplification.
         */
        od->irq = platform_get_irq(pdev, 0);
        ret = devm_request_irq(&pdev->dev, od->irq, owl_dma_interrupt, 0,
                               dev_name(&pdev->dev), od);
        if (ret) {
                dev_err(&pdev->dev, "unable to request IRQ\n");
                return ret;
        }

        /* Init physical channel */
        od->pchans = devm_kcalloc(&pdev->dev, od->nr_pchans,
                                  sizeof(struct owl_dma_pchan), GFP_KERNEL);
        if (!od->pchans)
                return -ENOMEM;

        for (i = 0; i < od->nr_pchans; i++) {
                struct owl_dma_pchan *pchan = &od->pchans[i];

                pchan->id = i;
                pchan->base = od->base + OWL_DMA_CHAN_BASE(i);
        }

        /* Init virtual channel */
        od->vchans = devm_kcalloc(&pdev->dev, od->nr_vchans,
                                  sizeof(struct owl_dma_vchan), GFP_KERNEL);
        if (!od->vchans)
                return -ENOMEM;

        for (i = 0; i < od->nr_vchans; i++) {
                struct owl_dma_vchan *vchan = &od->vchans[i];

                vchan->vc.desc_free = owl_dma_desc_free;
                vchan_init(&vchan->vc, &od->dma);
        }

        /* Create a pool of consistent memory blocks for hardware descriptors */
        od->lli_pool = dma_pool_create(dev_name(od->dma.dev), od->dma.dev,
                                       sizeof(struct owl_dma_lli),
                                       __alignof__(struct owl_dma_lli),
                                       0);
        if (!od->lli_pool) {
                dev_err(&pdev->dev, "unable to allocate DMA descriptor pool\n");
                return -ENOMEM;
        }

        clk_prepare_enable(od->clk);

        ret = dma_async_device_register(&od->dma);
        if (ret) {
                dev_err(&pdev->dev, "failed to register DMA engine device\n");
                goto err_pool_free;
        }

        /* Device-tree DMA controller registration */
        ret = of_dma_controller_register(pdev->dev.of_node,
                                         owl_dma_of_xlate, od);
        if (ret) {
                dev_err(&pdev->dev, "of_dma_controller_register failed\n");
                goto err_dma_unregister;
        }

        return 0;

err_dma_unregister:
        dma_async_device_unregister(&od->dma);
err_pool_free:
        clk_disable_unprepare(od->clk);
        dma_pool_destroy(od->lli_pool);

        return ret;
}

static void owl_dma_remove(struct platform_device *pdev)
{
        struct owl_dma *od = platform_get_drvdata(pdev);

        of_dma_controller_free(pdev->dev.of_node);
        dma_async_device_unregister(&od->dma);

        /* Mask all interrupts for this execution environment */
        dma_writel(od, OWL_DMA_IRQ_EN0, 0x0);

        /* Make sure we won't have any further interrupts */
        devm_free_irq(od->dma.dev, od->irq, od);

        owl_dma_free(od);

        clk_disable_unprepare(od->clk);
        dma_pool_destroy(od->lli_pool);
}

static struct platform_driver owl_dma_driver = {
        .probe  = owl_dma_probe,
        .remove = owl_dma_remove,
        .driver = {
                .name = "dma-owl",
                .of_match_table = of_match_ptr(owl_dma_match),
        },
};

static int owl_dma_init(void)
{
        return platform_driver_register(&owl_dma_driver);
}
subsys_initcall(owl_dma_init);

static void __exit owl_dma_exit(void)
{
        platform_driver_unregister(&owl_dma_driver);
}
module_exit(owl_dma_exit);

MODULE_AUTHOR("David Liu <liuwei@actions-semi.com>");
MODULE_AUTHOR("Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>");
MODULE_DESCRIPTION("Actions Semi Owl SoCs DMA driver");
MODULE_LICENSE("GPL");