root/drivers/dma/at_xdmac.c
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Driver for the Atmel Extensible DMA Controller (aka XDMAC on AT91 systems)
 *
 * Copyright (C) 2014 Atmel Corporation
 *
 * Author: Ludovic Desroches <ludovic.desroches@atmel.com>
 */

#include <asm/barrier.h>
#include <dt-bindings/dma/at91.h>
#include <linux/clk.h>
#include <linux/dmaengine.h>
#include <linux/dmapool.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/of_dma.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/pm.h>
#include <linux/pm_runtime.h>

#include "dmaengine.h"

/* Global registers */
#define AT_XDMAC_GTYPE          0x00    /* Global Type Register */
#define         AT_XDMAC_NB_CH(i)       (((i) & 0x1F) + 1)              /* Number of Channels Minus One */
#define         AT_XDMAC_FIFO_SZ(i)     (((i) >> 5) & 0x7FF)            /* Number of Bytes */
#define         AT_XDMAC_NB_REQ(i)      ((((i) >> 16) & 0x3F) + 1)      /* Number of Peripheral Requests Minus One */
#define AT_XDMAC_GCFG           0x04    /* Global Configuration Register */
#define         AT_XDMAC_WRHP(i)                (((i) & 0xF) << 4)
#define         AT_XDMAC_WRMP(i)                (((i) & 0xF) << 8)
#define         AT_XDMAC_WRLP(i)                (((i) & 0xF) << 12)
#define         AT_XDMAC_RDHP(i)                (((i) & 0xF) << 16)
#define         AT_XDMAC_RDMP(i)                (((i) & 0xF) << 20)
#define         AT_XDMAC_RDLP(i)                (((i) & 0xF) << 24)
#define         AT_XDMAC_RDSG(i)                (((i) & 0xF) << 28)
#define AT_XDMAC_GCFG_M2M       (AT_XDMAC_RDLP(0xF) | AT_XDMAC_WRLP(0xF))
#define AT_XDMAC_GCFG_P2M       (AT_XDMAC_RDSG(0x1) | AT_XDMAC_RDHP(0x3) | \
                                AT_XDMAC_WRHP(0x5))
#define AT_XDMAC_GWAC           0x08    /* Global Weighted Arbiter Configuration Register */
#define         AT_XDMAC_PW0(i)         (((i) & 0xF) << 0)
#define         AT_XDMAC_PW1(i)         (((i) & 0xF) << 4)
#define         AT_XDMAC_PW2(i)         (((i) & 0xF) << 8)
#define         AT_XDMAC_PW3(i)         (((i) & 0xF) << 12)
#define AT_XDMAC_GWAC_M2M       0
#define AT_XDMAC_GWAC_P2M       (AT_XDMAC_PW0(0xF) | AT_XDMAC_PW2(0xF))

#define AT_XDMAC_GIE            0x0C    /* Global Interrupt Enable Register */
#define AT_XDMAC_GID            0x10    /* Global Interrupt Disable Register */
#define AT_XDMAC_GIM            0x14    /* Global Interrupt Mask Register */
#define AT_XDMAC_GIS            0x18    /* Global Interrupt Status Register */
#define AT_XDMAC_GE             0x1C    /* Global Channel Enable Register */
#define AT_XDMAC_GD             0x20    /* Global Channel Disable Register */
#define AT_XDMAC_GS             0x24    /* Global Channel Status Register */
#define AT_XDMAC_VERSION        0xFFC   /* XDMAC Version Register */

/* Channel relative registers offsets */
#define AT_XDMAC_CIE            0x00    /* Channel Interrupt Enable Register */
#define         AT_XDMAC_CIE_BIE        BIT(0)  /* End of Block Interrupt Enable Bit */
#define         AT_XDMAC_CIE_LIE        BIT(1)  /* End of Linked List Interrupt Enable Bit */
#define         AT_XDMAC_CIE_DIE        BIT(2)  /* End of Disable Interrupt Enable Bit */
#define         AT_XDMAC_CIE_FIE        BIT(3)  /* End of Flush Interrupt Enable Bit */
#define         AT_XDMAC_CIE_RBEIE      BIT(4)  /* Read Bus Error Interrupt Enable Bit */
#define         AT_XDMAC_CIE_WBEIE      BIT(5)  /* Write Bus Error Interrupt Enable Bit */
#define         AT_XDMAC_CIE_ROIE       BIT(6)  /* Request Overflow Interrupt Enable Bit */
#define AT_XDMAC_CID            0x04    /* Channel Interrupt Disable Register */
#define         AT_XDMAC_CID_BID        BIT(0)  /* End of Block Interrupt Disable Bit */
#define         AT_XDMAC_CID_LID        BIT(1)  /* End of Linked List Interrupt Disable Bit */
#define         AT_XDMAC_CID_DID        BIT(2)  /* End of Disable Interrupt Disable Bit */
#define         AT_XDMAC_CID_FID        BIT(3)  /* End of Flush Interrupt Disable Bit */
#define         AT_XDMAC_CID_RBEID      BIT(4)  /* Read Bus Error Interrupt Disable Bit */
#define         AT_XDMAC_CID_WBEID      BIT(5)  /* Write Bus Error Interrupt Disable Bit */
#define         AT_XDMAC_CID_ROID       BIT(6)  /* Request Overflow Interrupt Disable Bit */
#define AT_XDMAC_CIM            0x08    /* Channel Interrupt Mask Register */
#define         AT_XDMAC_CIM_BIM        BIT(0)  /* End of Block Interrupt Mask Bit */
#define         AT_XDMAC_CIM_LIM        BIT(1)  /* End of Linked List Interrupt Mask Bit */
#define         AT_XDMAC_CIM_DIM        BIT(2)  /* End of Disable Interrupt Mask Bit */
#define         AT_XDMAC_CIM_FIM        BIT(3)  /* End of Flush Interrupt Mask Bit */
#define         AT_XDMAC_CIM_RBEIM      BIT(4)  /* Read Bus Error Interrupt Mask Bit */
#define         AT_XDMAC_CIM_WBEIM      BIT(5)  /* Write Bus Error Interrupt Mask Bit */
#define         AT_XDMAC_CIM_ROIM       BIT(6)  /* Request Overflow Interrupt Mask Bit */
#define AT_XDMAC_CIS            0x0C    /* Channel Interrupt Status Register */
#define         AT_XDMAC_CIS_BIS        BIT(0)  /* End of Block Interrupt Status Bit */
#define         AT_XDMAC_CIS_LIS        BIT(1)  /* End of Linked List Interrupt Status Bit */
#define         AT_XDMAC_CIS_DIS        BIT(2)  /* End of Disable Interrupt Status Bit */
#define         AT_XDMAC_CIS_FIS        BIT(3)  /* End of Flush Interrupt Status Bit */
#define         AT_XDMAC_CIS_RBEIS      BIT(4)  /* Read Bus Error Interrupt Status Bit */
#define         AT_XDMAC_CIS_WBEIS      BIT(5)  /* Write Bus Error Interrupt Status Bit */
#define         AT_XDMAC_CIS_ROIS       BIT(6)  /* Request Overflow Interrupt Status Bit */
#define AT_XDMAC_CSA            0x10    /* Channel Source Address Register */
#define AT_XDMAC_CDA            0x14    /* Channel Destination Address Register */
#define AT_XDMAC_CNDA           0x18    /* Channel Next Descriptor Address Register */
#define         AT_XDMAC_CNDA_NDAIF(i)  ((i) & 0x1)                     /* Channel x Next Descriptor Interface */
#define         AT_XDMAC_CNDA_NDA(i)    ((i) & 0xfffffffc)              /* Channel x Next Descriptor Address */
#define AT_XDMAC_CNDC           0x1C    /* Channel Next Descriptor Control Register */
#define         AT_XDMAC_CNDC_NDE               (0x1 << 0)              /* Channel x Next Descriptor Enable */
#define         AT_XDMAC_CNDC_NDSUP             (0x1 << 1)              /* Channel x Next Descriptor Source Update */
#define         AT_XDMAC_CNDC_NDDUP             (0x1 << 2)              /* Channel x Next Descriptor Destination Update */
#define         AT_XDMAC_CNDC_NDVIEW_MASK       GENMASK(28, 27)
#define         AT_XDMAC_CNDC_NDVIEW_NDV0       (0x0 << 3)              /* Channel x Next Descriptor View 0 */
#define         AT_XDMAC_CNDC_NDVIEW_NDV1       (0x1 << 3)              /* Channel x Next Descriptor View 1 */
#define         AT_XDMAC_CNDC_NDVIEW_NDV2       (0x2 << 3)              /* Channel x Next Descriptor View 2 */
#define         AT_XDMAC_CNDC_NDVIEW_NDV3       (0x3 << 3)              /* Channel x Next Descriptor View 3 */
#define AT_XDMAC_CUBC           0x20    /* Channel Microblock Control Register */
#define AT_XDMAC_CBC            0x24    /* Channel Block Control Register */
#define AT_XDMAC_CC             0x28    /* Channel Configuration Register */
#define         AT_XDMAC_CC_TYPE        (0x1 << 0)      /* Channel Transfer Type */
#define                 AT_XDMAC_CC_TYPE_MEM_TRAN       (0x0 << 0)      /* Memory to Memory Transfer */
#define                 AT_XDMAC_CC_TYPE_PER_TRAN       (0x1 << 0)      /* Peripheral to Memory or Memory to Peripheral Transfer */
#define         AT_XDMAC_CC_MBSIZE_MASK (0x3 << 1)
#define                 AT_XDMAC_CC_MBSIZE_SINGLE       (0x0 << 1)
#define                 AT_XDMAC_CC_MBSIZE_FOUR         (0x1 << 1)
#define                 AT_XDMAC_CC_MBSIZE_EIGHT        (0x2 << 1)
#define                 AT_XDMAC_CC_MBSIZE_SIXTEEN      (0x3 << 1)
#define         AT_XDMAC_CC_DSYNC       (0x1 << 4)      /* Channel Synchronization */
#define                 AT_XDMAC_CC_DSYNC_PER2MEM       (0x0 << 4)
#define                 AT_XDMAC_CC_DSYNC_MEM2PER       (0x1 << 4)
#define         AT_XDMAC_CC_PROT        (0x1 << 5)      /* Channel Protection */
#define                 AT_XDMAC_CC_PROT_SEC            (0x0 << 5)
#define                 AT_XDMAC_CC_PROT_UNSEC          (0x1 << 5)
#define         AT_XDMAC_CC_SWREQ       (0x1 << 6)      /* Channel Software Request Trigger */
#define                 AT_XDMAC_CC_SWREQ_HWR_CONNECTED (0x0 << 6)
#define                 AT_XDMAC_CC_SWREQ_SWR_CONNECTED (0x1 << 6)
#define         AT_XDMAC_CC_MEMSET      (0x1 << 7)      /* Channel Fill Block of memory */
#define                 AT_XDMAC_CC_MEMSET_NORMAL_MODE  (0x0 << 7)
#define                 AT_XDMAC_CC_MEMSET_HW_MODE      (0x1 << 7)
#define         AT_XDMAC_CC_CSIZE(i)    ((0x7 & (i)) << 8)      /* Channel Chunk Size */
#define         AT_XDMAC_CC_DWIDTH_OFFSET       11
#define         AT_XDMAC_CC_DWIDTH_MASK (0x3 << AT_XDMAC_CC_DWIDTH_OFFSET)
#define         AT_XDMAC_CC_DWIDTH(i)   ((0x3 & (i)) << AT_XDMAC_CC_DWIDTH_OFFSET)      /* Channel Data Width */
#define                 AT_XDMAC_CC_DWIDTH_BYTE         0x0
#define                 AT_XDMAC_CC_DWIDTH_HALFWORD     0x1
#define                 AT_XDMAC_CC_DWIDTH_WORD         0x2
#define                 AT_XDMAC_CC_DWIDTH_DWORD        0x3
#define         AT_XDMAC_CC_SIF(i)      ((0x1 & (i)) << 13)     /* Channel Source Interface Identifier */
#define         AT_XDMAC_CC_DIF(i)      ((0x1 & (i)) << 14)     /* Channel Destination Interface Identifier */
#define         AT_XDMAC_CC_SAM_MASK    (0x3 << 16)     /* Channel Source Addressing Mode */
#define                 AT_XDMAC_CC_SAM_FIXED_AM        (0x0 << 16)
#define                 AT_XDMAC_CC_SAM_INCREMENTED_AM  (0x1 << 16)
#define                 AT_XDMAC_CC_SAM_UBS_AM          (0x2 << 16)
#define                 AT_XDMAC_CC_SAM_UBS_DS_AM       (0x3 << 16)
#define         AT_XDMAC_CC_DAM_MASK    (0x3 << 18)     /* Channel Source Addressing Mode */
#define                 AT_XDMAC_CC_DAM_FIXED_AM        (0x0 << 18)
#define                 AT_XDMAC_CC_DAM_INCREMENTED_AM  (0x1 << 18)
#define                 AT_XDMAC_CC_DAM_UBS_AM          (0x2 << 18)
#define                 AT_XDMAC_CC_DAM_UBS_DS_AM       (0x3 << 18)
#define         AT_XDMAC_CC_INITD       (0x1 << 21)     /* Channel Initialization Terminated (read only) */
#define                 AT_XDMAC_CC_INITD_TERMINATED    (0x0 << 21)
#define                 AT_XDMAC_CC_INITD_IN_PROGRESS   (0x1 << 21)
#define         AT_XDMAC_CC_RDIP        (0x1 << 22)     /* Read in Progress (read only) */
#define                 AT_XDMAC_CC_RDIP_DONE           (0x0 << 22)
#define                 AT_XDMAC_CC_RDIP_IN_PROGRESS    (0x1 << 22)
#define         AT_XDMAC_CC_WRIP        (0x1 << 23)     /* Write in Progress (read only) */
#define                 AT_XDMAC_CC_WRIP_DONE           (0x0 << 23)
#define                 AT_XDMAC_CC_WRIP_IN_PROGRESS    (0x1 << 23)
#define         AT_XDMAC_CC_PERID(i)    ((0x7f & (i)) << 24)    /* Channel Peripheral Identifier */
#define AT_XDMAC_CDS_MSP        0x2C    /* Channel Data Stride Memory Set Pattern */
#define AT_XDMAC_CSUS           0x30    /* Channel Source Microblock Stride */
#define AT_XDMAC_CDUS           0x34    /* Channel Destination Microblock Stride */

/* Microblock control members */
#define AT_XDMAC_MBR_UBC_UBLEN_MAX      0xFFFFFFUL      /* Maximum Microblock Length */
#define AT_XDMAC_MBR_UBC_NDE            (0x1 << 24)     /* Next Descriptor Enable */
#define AT_XDMAC_MBR_UBC_NSEN           (0x1 << 25)     /* Next Descriptor Source Update */
#define AT_XDMAC_MBR_UBC_NDEN           (0x1 << 26)     /* Next Descriptor Destination Update */
#define AT_XDMAC_MBR_UBC_NDV0           (0x0 << 27)     /* Next Descriptor View 0 */
#define AT_XDMAC_MBR_UBC_NDV1           (0x1 << 27)     /* Next Descriptor View 1 */
#define AT_XDMAC_MBR_UBC_NDV2           (0x2 << 27)     /* Next Descriptor View 2 */
#define AT_XDMAC_MBR_UBC_NDV3           (0x3 << 27)     /* Next Descriptor View 3 */

#define AT_XDMAC_MAX_CHAN       0x20
#define AT_XDMAC_MAX_CSIZE      16      /* 16 data */
#define AT_XDMAC_MAX_DWIDTH     8       /* 64 bits */
#define AT_XDMAC_RESIDUE_MAX_RETRIES    5

#define AT_XDMAC_DMA_BUSWIDTHS\
        (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\
        BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |\
        BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |\
        BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |\
        BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))

enum atc_status {
        AT_XDMAC_CHAN_IS_CYCLIC = 0,
        AT_XDMAC_CHAN_IS_PAUSED,
        AT_XDMAC_CHAN_IS_PAUSED_INTERNAL,
};

struct at_xdmac_layout {
        /* Global Channel Read Suspend Register */
        u8                              grs;
        /* Global Write Suspend Register */
        u8                              gws;
        /* Global Channel Read Write Suspend Register */
        u8                              grws;
        /* Global Channel Read Write Resume Register */
        u8                              grwr;
        /* Global Channel Software Request Register */
        u8                              gswr;
        /* Global channel Software Request Status Register */
        u8                              gsws;
        /* Global Channel Software Flush Request Register */
        u8                              gswf;
        /* Channel reg base */
        u8                              chan_cc_reg_base;
        /* Source/Destination Interface must be specified or not */
        bool                            sdif;
        /* AXI queue priority configuration supported */
        bool                            axi_config;
};

/* ----- Channels ----- */
struct at_xdmac_chan {
        struct dma_chan                 chan;
        void __iomem                    *ch_regs;
        u32                             mask;           /* Channel Mask */
        u32                             cfg;            /* Channel Configuration Register */
        u8                              perid;          /* Peripheral ID */
        u8                              perif;          /* Peripheral Interface */
        u8                              memif;          /* Memory Interface */
        u32                             save_cc;
        u32                             save_cim;
        u32                             save_cnda;
        u32                             save_cndc;
        u32                             irq_status;
        unsigned long                   status;
        struct tasklet_struct           tasklet;
        struct dma_slave_config         sconfig;

        spinlock_t                      lock;

        struct list_head                xfers_list;
        struct list_head                free_descs_list;
};


/* ----- Controller ----- */
struct at_xdmac {
        struct dma_device       dma;
        void __iomem            *regs;
        struct device           *dev;
        int                     irq;
        struct clk              *clk;
        u32                     save_gim;
        u32                     save_gs;
        struct dma_pool         *at_xdmac_desc_pool;
        const struct at_xdmac_layout    *layout;
        struct at_xdmac_chan    chan[];
};


/* ----- Descriptors ----- */

/* Linked List Descriptor */
struct at_xdmac_lld {
        u32 mbr_nda;    /* Next Descriptor Member */
        u32 mbr_ubc;    /* Microblock Control Member */
        u32 mbr_sa;     /* Source Address Member */
        u32 mbr_da;     /* Destination Address Member */
        u32 mbr_cfg;    /* Configuration Register */
        u32 mbr_bc;     /* Block Control Register */
        u32 mbr_ds;     /* Data Stride Register */
        u32 mbr_sus;    /* Source Microblock Stride Register */
        u32 mbr_dus;    /* Destination Microblock Stride Register */
};

/* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */
struct at_xdmac_desc {
        struct at_xdmac_lld             lld;
        enum dma_transfer_direction     direction;
        struct dma_async_tx_descriptor  tx_dma_desc;
        struct list_head                desc_node;
        /* Following members are only used by the first descriptor */
        bool                            active_xfer;
        unsigned int                    xfer_size;
        struct list_head                descs_list;
        struct list_head                xfer_node;
} __aligned(sizeof(u64));

static const struct at_xdmac_layout at_xdmac_sama5d4_layout = {
        .grs = 0x28,
        .gws = 0x2C,
        .grws = 0x30,
        .grwr = 0x34,
        .gswr = 0x38,
        .gsws = 0x3C,
        .gswf = 0x40,
        .chan_cc_reg_base = 0x50,
        .sdif = true,
        .axi_config = false,
};

static const struct at_xdmac_layout at_xdmac_sama7g5_layout = {
        .grs = 0x30,
        .gws = 0x38,
        .grws = 0x40,
        .grwr = 0x44,
        .gswr = 0x48,
        .gsws = 0x4C,
        .gswf = 0x50,
        .chan_cc_reg_base = 0x60,
        .sdif = false,
        .axi_config = true,
};

static inline void __iomem *at_xdmac_chan_reg_base(struct at_xdmac *atxdmac, unsigned int chan_nb)
{
        return atxdmac->regs + (atxdmac->layout->chan_cc_reg_base + chan_nb * 0x40);
}

#define at_xdmac_read(atxdmac, reg) readl_relaxed((atxdmac)->regs + (reg))
#define at_xdmac_write(atxdmac, reg, value) \
        writel_relaxed((value), (atxdmac)->regs + (reg))

#define at_xdmac_chan_read(atchan, reg) readl_relaxed((atchan)->ch_regs + (reg))
#define at_xdmac_chan_write(atchan, reg, value) writel_relaxed((value), (atchan)->ch_regs + (reg))

static inline struct at_xdmac_chan *to_at_xdmac_chan(struct dma_chan *dchan)
{
        return container_of(dchan, struct at_xdmac_chan, chan);
}

static struct device *chan2dev(struct dma_chan *chan)
{
        return &chan->dev->device;
}

static inline struct at_xdmac *to_at_xdmac(struct dma_device *ddev)
{
        return container_of(ddev, struct at_xdmac, dma);
}

static inline struct at_xdmac_desc *txd_to_at_desc(struct dma_async_tx_descriptor *txd)
{
        return container_of(txd, struct at_xdmac_desc, tx_dma_desc);
}

static inline int at_xdmac_chan_is_cyclic(struct at_xdmac_chan *atchan)
{
        return test_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status);
}

static inline int at_xdmac_chan_is_paused(struct at_xdmac_chan *atchan)
{
        return test_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
}

static inline int at_xdmac_chan_is_paused_internal(struct at_xdmac_chan *atchan)
{
        return test_bit(AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, &atchan->status);
}

static inline bool at_xdmac_chan_is_peripheral_xfer(u32 cfg)
{
        return cfg & AT_XDMAC_CC_TYPE_PER_TRAN;
}

static inline u8 at_xdmac_get_dwidth(u32 cfg)
{
        return (cfg & AT_XDMAC_CC_DWIDTH_MASK) >> AT_XDMAC_CC_DWIDTH_OFFSET;
};

static unsigned int init_nr_desc_per_channel = 64;
module_param(init_nr_desc_per_channel, uint, 0644);
MODULE_PARM_DESC(init_nr_desc_per_channel,
                 "initial descriptors per channel (default: 64)");


static void at_xdmac_runtime_suspend_descriptors(struct at_xdmac_chan *atchan)
{
        struct at_xdmac         *atxdmac = to_at_xdmac(atchan->chan.device);
        struct at_xdmac_desc    *desc, *_desc;

        list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) {
                if (!desc->active_xfer)
                        continue;

                pm_runtime_put_autosuspend(atxdmac->dev);
        }
}

static int at_xdmac_runtime_resume_descriptors(struct at_xdmac_chan *atchan)
{
        struct at_xdmac         *atxdmac = to_at_xdmac(atchan->chan.device);
        struct at_xdmac_desc    *desc, *_desc;
        int                     ret;

        list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) {
                if (!desc->active_xfer)
                        continue;

                ret = pm_runtime_resume_and_get(atxdmac->dev);
                if (ret < 0)
                        return ret;
        }

        return 0;
}

static bool at_xdmac_chan_is_enabled(struct at_xdmac_chan *atchan)
{
        struct at_xdmac         *atxdmac = to_at_xdmac(atchan->chan.device);
        int                     ret;

        ret = pm_runtime_resume_and_get(atxdmac->dev);
        if (ret < 0)
                return false;

        ret = !!(at_xdmac_chan_read(atchan, AT_XDMAC_GS) & atchan->mask);

        pm_runtime_put_autosuspend(atxdmac->dev);

        return ret;
}

static void at_xdmac_off(struct at_xdmac *atxdmac, bool suspend_descriptors)
{
        struct dma_chan         *chan, *_chan;
        struct at_xdmac_chan    *atchan;
        int                     ret;

        ret = pm_runtime_resume_and_get(atxdmac->dev);
        if (ret < 0)
                return;

        at_xdmac_write(atxdmac, AT_XDMAC_GD, -1L);

        /* Wait that all chans are disabled. */
        while (at_xdmac_read(atxdmac, AT_XDMAC_GS))
                cpu_relax();

        at_xdmac_write(atxdmac, AT_XDMAC_GID, -1L);

        /* Decrement runtime PM ref counter for each active descriptor. */
        if (!list_empty(&atxdmac->dma.channels) && suspend_descriptors) {
                list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels,
                                         device_node) {
                        atchan = to_at_xdmac_chan(chan);
                        at_xdmac_runtime_suspend_descriptors(atchan);
                }
        }

        pm_runtime_put_autosuspend(atxdmac->dev);
}

/* Call with lock hold. */
static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan,
                                struct at_xdmac_desc *first)
{
        struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device);
        u32             reg;
        int             ret;

        ret = pm_runtime_resume_and_get(atxdmac->dev);
        if (ret < 0)
                return;

        dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, first);

        /* Set transfer as active to not try to start it again. */
        first->active_xfer = true;

        /* Tell xdmac where to get the first descriptor. */
        reg = AT_XDMAC_CNDA_NDA(first->tx_dma_desc.phys);
        if (atxdmac->layout->sdif)
                reg |= AT_XDMAC_CNDA_NDAIF(atchan->memif);

        at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, reg);

        /*
         * When doing non cyclic transfer we need to use the next
         * descriptor view 2 since some fields of the configuration register
         * depend on transfer size and src/dest addresses.
         */
        if (at_xdmac_chan_is_cyclic(atchan))
                reg = AT_XDMAC_CNDC_NDVIEW_NDV1;
        else if ((first->lld.mbr_ubc &
                  AT_XDMAC_CNDC_NDVIEW_MASK) == AT_XDMAC_MBR_UBC_NDV3)
                reg = AT_XDMAC_CNDC_NDVIEW_NDV3;
        else
                reg = AT_XDMAC_CNDC_NDVIEW_NDV2;
        /*
         * Even if the register will be updated from the configuration in the
         * descriptor when using view 2 or higher, the PROT bit won't be set
         * properly. This bit can be modified only by using the channel
         * configuration register.
         */
        at_xdmac_chan_write(atchan, AT_XDMAC_CC, first->lld.mbr_cfg);

        reg |= AT_XDMAC_CNDC_NDDUP
               | AT_XDMAC_CNDC_NDSUP
               | AT_XDMAC_CNDC_NDE;
        at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, reg);

        dev_vdbg(chan2dev(&atchan->chan),
                 "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
                 __func__, at_xdmac_chan_read(atchan, AT_XDMAC_CC),
                 at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
                 at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
                 at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
                 at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
                 at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));

        at_xdmac_chan_write(atchan, AT_XDMAC_CID, 0xffffffff);
        reg = AT_XDMAC_CIE_RBEIE | AT_XDMAC_CIE_WBEIE;
        /*
         * Request Overflow Error is only for peripheral synchronized transfers
         */
        if (at_xdmac_chan_is_peripheral_xfer(first->lld.mbr_cfg))
                reg |= AT_XDMAC_CIE_ROIE;

        /*
         * There is no end of list when doing cyclic dma, we need to get
         * an interrupt after each periods.
         */
        if (at_xdmac_chan_is_cyclic(atchan))
                at_xdmac_chan_write(atchan, AT_XDMAC_CIE,
                                    reg | AT_XDMAC_CIE_BIE);
        else
                at_xdmac_chan_write(atchan, AT_XDMAC_CIE,
                                    reg | AT_XDMAC_CIE_LIE);
        at_xdmac_write(atxdmac, AT_XDMAC_GIE, atchan->mask);
        dev_vdbg(chan2dev(&atchan->chan),
                 "%s: enable channel (0x%08x)\n", __func__, atchan->mask);
        wmb();
        at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask);

        dev_vdbg(chan2dev(&atchan->chan),
                 "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
                 __func__, at_xdmac_chan_read(atchan, AT_XDMAC_CC),
                 at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
                 at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
                 at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
                 at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
                 at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));
}

static dma_cookie_t at_xdmac_tx_submit(struct dma_async_tx_descriptor *tx)
{
        struct at_xdmac_desc    *desc = txd_to_at_desc(tx);
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(tx->chan);
        dma_cookie_t            cookie;
        unsigned long           irqflags;

        spin_lock_irqsave(&atchan->lock, irqflags);
        cookie = dma_cookie_assign(tx);

        list_add_tail(&desc->xfer_node, &atchan->xfers_list);
        spin_unlock_irqrestore(&atchan->lock, irqflags);

        dev_vdbg(chan2dev(tx->chan), "%s: atchan 0x%p, add desc 0x%p to xfers_list\n",
                 __func__, atchan, desc);

        return cookie;
}

static struct at_xdmac_desc *at_xdmac_alloc_desc(struct dma_chan *chan,
                                                 gfp_t gfp_flags)
{
        struct at_xdmac_desc    *desc;
        struct at_xdmac         *atxdmac = to_at_xdmac(chan->device);
        dma_addr_t              phys;

        desc = dma_pool_zalloc(atxdmac->at_xdmac_desc_pool, gfp_flags, &phys);
        if (desc) {
                INIT_LIST_HEAD(&desc->descs_list);
                dma_async_tx_descriptor_init(&desc->tx_dma_desc, chan);
                desc->tx_dma_desc.tx_submit = at_xdmac_tx_submit;
                desc->tx_dma_desc.phys = phys;
        }

        return desc;
}

static void at_xdmac_init_used_desc(struct at_xdmac_desc *desc)
{
        memset(&desc->lld, 0, sizeof(desc->lld));
        INIT_LIST_HEAD(&desc->descs_list);
        desc->direction = DMA_TRANS_NONE;
        desc->xfer_size = 0;
        desc->active_xfer = false;
}

/* Call must be protected by lock. */
static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan)
{
        struct at_xdmac_desc *desc;

        if (list_empty(&atchan->free_descs_list)) {
                desc = at_xdmac_alloc_desc(&atchan->chan, GFP_NOWAIT);
        } else {
                desc = list_first_entry(&atchan->free_descs_list,
                                        struct at_xdmac_desc, desc_node);
                list_del(&desc->desc_node);
                at_xdmac_init_used_desc(desc);
        }

        return desc;
}

static void at_xdmac_queue_desc(struct dma_chan *chan,
                                struct at_xdmac_desc *prev,
                                struct at_xdmac_desc *desc)
{
        if (!prev || !desc)
                return;

        prev->lld.mbr_nda = desc->tx_dma_desc.phys;
        prev->lld.mbr_ubc |= AT_XDMAC_MBR_UBC_NDE;

        dev_dbg(chan2dev(chan), "%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
                __func__, prev, &prev->lld.mbr_nda);
}

static inline void at_xdmac_increment_block_count(struct dma_chan *chan,
                                                  struct at_xdmac_desc *desc)
{
        if (!desc)
                return;

        desc->lld.mbr_bc++;

        dev_dbg(chan2dev(chan),
                "%s: incrementing the block count of the desc 0x%p\n",
                __func__, desc);
}

static struct dma_chan *at_xdmac_xlate(struct of_phandle_args *dma_spec,
                                       struct of_dma *of_dma)
{
        struct at_xdmac         *atxdmac = of_dma->of_dma_data;
        struct at_xdmac_chan    *atchan;
        struct dma_chan         *chan;
        struct device           *dev = atxdmac->dma.dev;

        if (dma_spec->args_count != 1) {
                dev_err(dev, "dma phandler args: bad number of args\n");
                return NULL;
        }

        chan = dma_get_any_slave_channel(&atxdmac->dma);
        if (!chan) {
                dev_err(dev, "can't get a dma channel\n");
                return NULL;
        }

        atchan = to_at_xdmac_chan(chan);
        atchan->memif = AT91_XDMAC_DT_GET_MEM_IF(dma_spec->args[0]);
        atchan->perif = AT91_XDMAC_DT_GET_PER_IF(dma_spec->args[0]);
        atchan->perid = AT91_XDMAC_DT_GET_PERID(dma_spec->args[0]);
        dev_dbg(dev, "chan dt cfg: memif=%u perif=%u perid=%u\n",
                 atchan->memif, atchan->perif, atchan->perid);

        return chan;
}

static int at_xdmac_compute_chan_conf(struct dma_chan *chan,
                                      enum dma_transfer_direction direction)
{
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac         *atxdmac = to_at_xdmac(atchan->chan.device);
        int                     csize, dwidth;

        if (direction == DMA_DEV_TO_MEM) {
                atchan->cfg =
                        AT91_XDMAC_DT_PERID(atchan->perid)
                        | AT_XDMAC_CC_DAM_INCREMENTED_AM
                        | AT_XDMAC_CC_SAM_FIXED_AM
                        | AT_XDMAC_CC_SWREQ_HWR_CONNECTED
                        | AT_XDMAC_CC_DSYNC_PER2MEM
                        | AT_XDMAC_CC_MBSIZE_SIXTEEN
                        | AT_XDMAC_CC_TYPE_PER_TRAN;
                if (atxdmac->layout->sdif)
                        atchan->cfg |= AT_XDMAC_CC_DIF(atchan->memif) |
                                       AT_XDMAC_CC_SIF(atchan->perif);

                csize = ffs(atchan->sconfig.src_maxburst) - 1;
                if (csize < 0) {
                        dev_err(chan2dev(chan), "invalid src maxburst value\n");
                        return -EINVAL;
                }
                atchan->cfg |= AT_XDMAC_CC_CSIZE(csize);
                dwidth = ffs(atchan->sconfig.src_addr_width) - 1;
                if (dwidth < 0) {
                        dev_err(chan2dev(chan), "invalid src addr width value\n");
                        return -EINVAL;
                }
                atchan->cfg |= AT_XDMAC_CC_DWIDTH(dwidth);
        } else if (direction == DMA_MEM_TO_DEV) {
                atchan->cfg =
                        AT91_XDMAC_DT_PERID(atchan->perid)
                        | AT_XDMAC_CC_DAM_FIXED_AM
                        | AT_XDMAC_CC_SAM_INCREMENTED_AM
                        | AT_XDMAC_CC_SWREQ_HWR_CONNECTED
                        | AT_XDMAC_CC_DSYNC_MEM2PER
                        | AT_XDMAC_CC_MBSIZE_SIXTEEN
                        | AT_XDMAC_CC_TYPE_PER_TRAN;
                if (atxdmac->layout->sdif)
                        atchan->cfg |= AT_XDMAC_CC_DIF(atchan->perif) |
                                       AT_XDMAC_CC_SIF(atchan->memif);

                csize = ffs(atchan->sconfig.dst_maxburst) - 1;
                if (csize < 0) {
                        dev_err(chan2dev(chan), "invalid src maxburst value\n");
                        return -EINVAL;
                }
                atchan->cfg |= AT_XDMAC_CC_CSIZE(csize);
                dwidth = ffs(atchan->sconfig.dst_addr_width) - 1;
                if (dwidth < 0) {
                        dev_err(chan2dev(chan), "invalid dst addr width value\n");
                        return -EINVAL;
                }
                atchan->cfg |= AT_XDMAC_CC_DWIDTH(dwidth);
        }

        dev_dbg(chan2dev(chan), "%s: cfg=0x%08x\n", __func__, atchan->cfg);

        return 0;
}

/*
 * Only check that maxburst and addr width values are supported by
 * the controller but not that the configuration is good to perform the
 * transfer since we don't know the direction at this stage.
 */
static int at_xdmac_check_slave_config(struct dma_slave_config *sconfig)
{
        if ((sconfig->src_maxburst > AT_XDMAC_MAX_CSIZE)
            || (sconfig->dst_maxburst > AT_XDMAC_MAX_CSIZE))
                return -EINVAL;

        if ((sconfig->src_addr_width > AT_XDMAC_MAX_DWIDTH)
            || (sconfig->dst_addr_width > AT_XDMAC_MAX_DWIDTH))
                return -EINVAL;

        return 0;
}

static int at_xdmac_set_slave_config(struct dma_chan *chan,
                                      struct dma_slave_config *sconfig)
{
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);

        if (at_xdmac_check_slave_config(sconfig)) {
                dev_err(chan2dev(chan), "invalid slave configuration\n");
                return -EINVAL;
        }

        memcpy(&atchan->sconfig, sconfig, sizeof(atchan->sconfig));

        return 0;
}

static struct dma_async_tx_descriptor *
at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
                       unsigned int sg_len, enum dma_transfer_direction direction,
                       unsigned long flags, void *context)
{
        struct at_xdmac_chan            *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac_desc            *first = NULL, *prev = NULL;
        struct scatterlist              *sg;
        int                             i;
        unsigned int                    xfer_size = 0;
        unsigned long                   irqflags;
        struct dma_async_tx_descriptor  *ret = NULL;

        if (!sgl)
                return NULL;

        if (!is_slave_direction(direction)) {
                dev_err(chan2dev(chan), "invalid DMA direction\n");
                return NULL;
        }

        dev_dbg(chan2dev(chan), "%s: sg_len=%d, dir=%s, flags=0x%lx\n",
                 __func__, sg_len,
                 direction == DMA_MEM_TO_DEV ? "to device" : "from device",
                 flags);

        /* Protect dma_sconfig field that can be modified by set_slave_conf. */
        spin_lock_irqsave(&atchan->lock, irqflags);

        if (at_xdmac_compute_chan_conf(chan, direction))
                goto spin_unlock;

        /* Prepare descriptors. */
        for_each_sg(sgl, sg, sg_len, i) {
                struct at_xdmac_desc    *desc = NULL;
                u32                     len, mem, dwidth, fixed_dwidth;

                len = sg_dma_len(sg);
                mem = sg_dma_address(sg);
                if (unlikely(!len)) {
                        dev_err(chan2dev(chan), "sg data length is zero\n");
                        goto spin_unlock;
                }
                dev_dbg(chan2dev(chan), "%s: * sg%d len=%u, mem=0x%08x\n",
                         __func__, i, len, mem);

                desc = at_xdmac_get_desc(atchan);
                if (!desc) {
                        dev_err(chan2dev(chan), "can't get descriptor\n");
                        if (first)
                                list_splice_tail_init(&first->descs_list,
                                                      &atchan->free_descs_list);
                        goto spin_unlock;
                }

                /* Linked list descriptor setup. */
                if (direction == DMA_DEV_TO_MEM) {
                        desc->lld.mbr_sa = atchan->sconfig.src_addr;
                        desc->lld.mbr_da = mem;
                } else {
                        desc->lld.mbr_sa = mem;
                        desc->lld.mbr_da = atchan->sconfig.dst_addr;
                }
                dwidth = at_xdmac_get_dwidth(atchan->cfg);
                fixed_dwidth = IS_ALIGNED(len, 1 << dwidth)
                               ? dwidth
                               : AT_XDMAC_CC_DWIDTH_BYTE;
                desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2                       /* next descriptor view */
                        | AT_XDMAC_MBR_UBC_NDEN                                 /* next descriptor dst parameter update */
                        | AT_XDMAC_MBR_UBC_NSEN                                 /* next descriptor src parameter update */
                        | (len >> fixed_dwidth);                                /* microblock length */
                desc->lld.mbr_cfg = (atchan->cfg & ~AT_XDMAC_CC_DWIDTH_MASK) |
                                    AT_XDMAC_CC_DWIDTH(fixed_dwidth);
                dev_dbg(chan2dev(chan),
                         "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
                         __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);

                /* Chain lld. */
                if (prev)
                        at_xdmac_queue_desc(chan, prev, desc);

                prev = desc;
                if (!first)
                        first = desc;

                dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
                         __func__, desc, first);
                list_add_tail(&desc->desc_node, &first->descs_list);
                xfer_size += len;
        }


        first->tx_dma_desc.flags = flags;
        first->xfer_size = xfer_size;
        first->direction = direction;
        ret = &first->tx_dma_desc;

spin_unlock:
        spin_unlock_irqrestore(&atchan->lock, irqflags);
        return ret;
}

static struct dma_async_tx_descriptor *
at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
                         size_t buf_len, size_t period_len,
                         enum dma_transfer_direction direction,
                         unsigned long flags)
{
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac_desc    *first = NULL, *prev = NULL;
        unsigned int            periods = buf_len / period_len;
        int                     i;
        unsigned long           irqflags;

        dev_dbg(chan2dev(chan), "%s: buf_addr=%pad, buf_len=%zd, period_len=%zd, dir=%s, flags=0x%lx\n",
                __func__, &buf_addr, buf_len, period_len,
                direction == DMA_MEM_TO_DEV ? "mem2per" : "per2mem", flags);

        if (!is_slave_direction(direction)) {
                dev_err(chan2dev(chan), "invalid DMA direction\n");
                return NULL;
        }

        if (test_and_set_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status)) {
                dev_err(chan2dev(chan), "channel currently used\n");
                return NULL;
        }

        if (at_xdmac_compute_chan_conf(chan, direction))
                return NULL;

        for (i = 0; i < periods; i++) {
                struct at_xdmac_desc    *desc = NULL;

                spin_lock_irqsave(&atchan->lock, irqflags);
                desc = at_xdmac_get_desc(atchan);
                if (!desc) {
                        dev_err(chan2dev(chan), "can't get descriptor\n");
                        if (first)
                                list_splice_tail_init(&first->descs_list,
                                                      &atchan->free_descs_list);
                        spin_unlock_irqrestore(&atchan->lock, irqflags);
                        return NULL;
                }
                spin_unlock_irqrestore(&atchan->lock, irqflags);
                dev_dbg(chan2dev(chan),
                        "%s: desc=0x%p, tx_dma_desc.phys=%pad\n",
                        __func__, desc, &desc->tx_dma_desc.phys);

                if (direction == DMA_DEV_TO_MEM) {
                        desc->lld.mbr_sa = atchan->sconfig.src_addr;
                        desc->lld.mbr_da = buf_addr + i * period_len;
                } else {
                        desc->lld.mbr_sa = buf_addr + i * period_len;
                        desc->lld.mbr_da = atchan->sconfig.dst_addr;
                }
                desc->lld.mbr_cfg = atchan->cfg;
                desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1
                        | AT_XDMAC_MBR_UBC_NDEN
                        | AT_XDMAC_MBR_UBC_NSEN
                        | period_len >> at_xdmac_get_dwidth(desc->lld.mbr_cfg);

                dev_dbg(chan2dev(chan),
                         "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
                         __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);

                /* Chain lld. */
                if (prev)
                        at_xdmac_queue_desc(chan, prev, desc);

                prev = desc;
                if (!first)
                        first = desc;

                dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
                         __func__, desc, first);
                list_add_tail(&desc->desc_node, &first->descs_list);
        }

        at_xdmac_queue_desc(chan, prev, first);
        first->tx_dma_desc.flags = flags;
        first->xfer_size = buf_len;
        first->direction = direction;

        return &first->tx_dma_desc;
}

static inline u32 at_xdmac_align_width(struct dma_chan *chan, dma_addr_t addr)
{
        u32 width;

        /*
         * Check address alignment to select the greater data width we
         * can use.
         *
         * Some XDMAC implementations don't provide dword transfer, in
         * this case selecting dword has the same behavior as
         * selecting word transfers.
         */
        if (!(addr & 7)) {
                width = AT_XDMAC_CC_DWIDTH_DWORD;
                dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__);
        } else if (!(addr & 3)) {
                width = AT_XDMAC_CC_DWIDTH_WORD;
                dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__);
        } else if (!(addr & 1)) {
                width = AT_XDMAC_CC_DWIDTH_HALFWORD;
                dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__);
        } else {
                width = AT_XDMAC_CC_DWIDTH_BYTE;
                dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__);
        }

        return width;
}

static struct at_xdmac_desc *
at_xdmac_interleaved_queue_desc(struct dma_chan *chan,
                                struct at_xdmac_chan *atchan,
                                struct at_xdmac_desc *prev,
                                dma_addr_t src, dma_addr_t dst,
                                struct dma_interleaved_template *xt,
                                struct data_chunk *chunk)
{
        struct at_xdmac_desc    *desc;
        u32                     dwidth;
        unsigned long           flags;
        size_t                  ublen;
        /*
         * WARNING: The channel configuration is set here since there is no
         * dmaengine_slave_config call in this case. Moreover we don't know the
         * direction, it involves we can't dynamically set the source and dest
         * interface so we have to use the same one. Only interface 0 allows EBI
         * access. Hopefully we can access DDR through both ports (at least on
         * SAMA5D4x), so we can use the same interface for source and dest,
         * that solves the fact we don't know the direction.
         * ERRATA: Even if useless for memory transfers, the PERID has to not
         * match the one of another channel. If not, it could lead to spurious
         * flag status.
         * For SAMA7G5x case, the SIF and DIF fields are no longer used.
         * Thus, no need to have the SIF/DIF interfaces here.
         * For SAMA5D4x and SAMA5D2x the SIF and DIF are already configured as
         * zero.
         */
        u32                     chan_cc = AT_XDMAC_CC_PERID(0x7f)
                                        | AT_XDMAC_CC_MBSIZE_SIXTEEN
                                        | AT_XDMAC_CC_TYPE_MEM_TRAN;

        dwidth = at_xdmac_align_width(chan, src | dst | chunk->size);
        if (chunk->size >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) {
                dev_dbg(chan2dev(chan),
                        "%s: chunk too big (%zu, max size %lu)...\n",
                        __func__, chunk->size,
                        AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth);
                return NULL;
        }

        if (prev)
                dev_dbg(chan2dev(chan),
                        "Adding items at the end of desc 0x%p\n", prev);

        if (xt->src_inc) {
                if (xt->src_sgl)
                        chan_cc |=  AT_XDMAC_CC_SAM_UBS_AM;
                else
                        chan_cc |=  AT_XDMAC_CC_SAM_INCREMENTED_AM;
        }

        if (xt->dst_inc) {
                if (xt->dst_sgl)
                        chan_cc |=  AT_XDMAC_CC_DAM_UBS_AM;
                else
                        chan_cc |=  AT_XDMAC_CC_DAM_INCREMENTED_AM;
        }

        spin_lock_irqsave(&atchan->lock, flags);
        desc = at_xdmac_get_desc(atchan);
        spin_unlock_irqrestore(&atchan->lock, flags);
        if (!desc) {
                dev_err(chan2dev(chan), "can't get descriptor\n");
                return NULL;
        }

        chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);

        ublen = chunk->size >> dwidth;

        desc->lld.mbr_sa = src;
        desc->lld.mbr_da = dst;
        desc->lld.mbr_sus = dmaengine_get_src_icg(xt, chunk);
        desc->lld.mbr_dus = dmaengine_get_dst_icg(xt, chunk);

        desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3
                | AT_XDMAC_MBR_UBC_NDEN
                | AT_XDMAC_MBR_UBC_NSEN
                | ublen;
        desc->lld.mbr_cfg = chan_cc;

        dev_dbg(chan2dev(chan),
                "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
                __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da,
                desc->lld.mbr_ubc, desc->lld.mbr_cfg);

        /* Chain lld. */
        if (prev)
                at_xdmac_queue_desc(chan, prev, desc);

        return desc;
}

static struct dma_async_tx_descriptor *
at_xdmac_prep_interleaved(struct dma_chan *chan,
                          struct dma_interleaved_template *xt,
                          unsigned long flags)
{
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac_desc    *prev = NULL, *first = NULL;
        dma_addr_t              dst_addr, src_addr;
        size_t                  src_skip = 0, dst_skip = 0, len = 0;
        struct data_chunk       *chunk;
        int                     i;

        if (!xt || !xt->numf || (xt->dir != DMA_MEM_TO_MEM))
                return NULL;

        /*
         * TODO: Handle the case where we have to repeat a chain of
         * descriptors...
         */
        if ((xt->numf > 1) && (xt->frame_size > 1))
                return NULL;

        dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, numf=%zu, frame_size=%zu, flags=0x%lx\n",
                __func__, &xt->src_start, &xt->dst_start,       xt->numf,
                xt->frame_size, flags);

        src_addr = xt->src_start;
        dst_addr = xt->dst_start;

        if (xt->numf > 1) {
                first = at_xdmac_interleaved_queue_desc(chan, atchan,
                                                        NULL,
                                                        src_addr, dst_addr,
                                                        xt, xt->sgl);
                if (!first)
                        return NULL;

                /* Length of the block is (BLEN+1) microblocks. */
                for (i = 0; i < xt->numf - 1; i++)
                        at_xdmac_increment_block_count(chan, first);

                dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
                        __func__, first, first);
                list_add_tail(&first->desc_node, &first->descs_list);
        } else {
                for (i = 0; i < xt->frame_size; i++) {
                        size_t src_icg = 0, dst_icg = 0;
                        struct at_xdmac_desc *desc;

                        chunk = xt->sgl + i;

                        dst_icg = dmaengine_get_dst_icg(xt, chunk);
                        src_icg = dmaengine_get_src_icg(xt, chunk);

                        src_skip = chunk->size + src_icg;
                        dst_skip = chunk->size + dst_icg;

                        dev_dbg(chan2dev(chan),
                                "%s: chunk size=%zu, src icg=%zu, dst icg=%zu\n",
                                __func__, chunk->size, src_icg, dst_icg);

                        desc = at_xdmac_interleaved_queue_desc(chan, atchan,
                                                               prev,
                                                               src_addr, dst_addr,
                                                               xt, chunk);
                        if (!desc) {
                                if (first)
                                        list_splice_tail_init(&first->descs_list,
                                                              &atchan->free_descs_list);
                                return NULL;
                        }

                        if (!first)
                                first = desc;

                        dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
                                __func__, desc, first);
                        list_add_tail(&desc->desc_node, &first->descs_list);

                        if (xt->src_sgl)
                                src_addr += src_skip;

                        if (xt->dst_sgl)
                                dst_addr += dst_skip;

                        len += chunk->size;
                        prev = desc;
                }
        }

        first->tx_dma_desc.cookie = -EBUSY;
        first->tx_dma_desc.flags = flags;
        first->xfer_size = len;

        return &first->tx_dma_desc;
}

static struct dma_async_tx_descriptor *
at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
                         size_t len, unsigned long flags)
{
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac_desc    *first = NULL, *prev = NULL;
        size_t                  remaining_size = len, xfer_size = 0, ublen;
        dma_addr_t              src_addr = src, dst_addr = dest;
        u32                     dwidth;
        /*
         * WARNING: We don't know the direction, it involves we can't
         * dynamically set the source and dest interface so we have to use the
         * same one. Only interface 0 allows EBI access. Hopefully we can
         * access DDR through both ports (at least on SAMA5D4x), so we can use
         * the same interface for source and dest, that solves the fact we
         * don't know the direction.
         * ERRATA: Even if useless for memory transfers, the PERID has to not
         * match the one of another channel. If not, it could lead to spurious
         * flag status.
         * For SAMA7G5x case, the SIF and DIF fields are no longer used.
         * Thus, no need to have the SIF/DIF interfaces here.
         * For SAMA5D4x and SAMA5D2x the SIF and DIF are already configured as
         * zero.
         */
        u32                     chan_cc = AT_XDMAC_CC_PERID(0x7f)
                                        | AT_XDMAC_CC_DAM_INCREMENTED_AM
                                        | AT_XDMAC_CC_SAM_INCREMENTED_AM
                                        | AT_XDMAC_CC_MBSIZE_SIXTEEN
                                        | AT_XDMAC_CC_TYPE_MEM_TRAN;
        unsigned long           irqflags;

        dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, len=%zd, flags=0x%lx\n",
                __func__, &src, &dest, len, flags);

        if (unlikely(!len))
                return NULL;

        dwidth = at_xdmac_align_width(chan, src_addr | dst_addr);

        /* Prepare descriptors. */
        while (remaining_size) {
                struct at_xdmac_desc    *desc = NULL;

                dev_dbg(chan2dev(chan), "%s: remaining_size=%zu\n", __func__, remaining_size);

                spin_lock_irqsave(&atchan->lock, irqflags);
                desc = at_xdmac_get_desc(atchan);
                spin_unlock_irqrestore(&atchan->lock, irqflags);
                if (!desc) {
                        dev_err(chan2dev(chan), "can't get descriptor\n");
                        if (first)
                                list_splice_tail_init(&first->descs_list,
                                                      &atchan->free_descs_list);
                        return NULL;
                }

                /* Update src and dest addresses. */
                src_addr += xfer_size;
                dst_addr += xfer_size;

                if (remaining_size >= AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)
                        xfer_size = AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth;
                else
                        xfer_size = remaining_size;

                dev_dbg(chan2dev(chan), "%s: xfer_size=%zu\n", __func__, xfer_size);

                /* Check remaining length and change data width if needed. */
                dwidth = at_xdmac_align_width(chan,
                                              src_addr | dst_addr | xfer_size);
                chan_cc &= ~AT_XDMAC_CC_DWIDTH_MASK;
                chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);

                ublen = xfer_size >> dwidth;
                remaining_size -= xfer_size;

                desc->lld.mbr_sa = src_addr;
                desc->lld.mbr_da = dst_addr;
                desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2
                        | AT_XDMAC_MBR_UBC_NDEN
                        | AT_XDMAC_MBR_UBC_NSEN
                        | ublen;
                desc->lld.mbr_cfg = chan_cc;

                dev_dbg(chan2dev(chan),
                         "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
                         __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc, desc->lld.mbr_cfg);

                /* Chain lld. */
                if (prev)
                        at_xdmac_queue_desc(chan, prev, desc);

                prev = desc;
                if (!first)
                        first = desc;

                dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
                         __func__, desc, first);
                list_add_tail(&desc->desc_node, &first->descs_list);
        }

        first->tx_dma_desc.flags = flags;
        first->xfer_size = len;

        return &first->tx_dma_desc;
}

static struct at_xdmac_desc *at_xdmac_memset_create_desc(struct dma_chan *chan,
                                                         struct at_xdmac_chan *atchan,
                                                         dma_addr_t dst_addr,
                                                         size_t len,
                                                         int value)
{
        struct at_xdmac_desc    *desc;
        unsigned long           flags;
        size_t                  ublen;
        u32                     dwidth;
        char                    pattern;
        /*
         * WARNING: The channel configuration is set here since there is no
         * dmaengine_slave_config call in this case. Moreover we don't know the
         * direction, it involves we can't dynamically set the source and dest
         * interface so we have to use the same one. Only interface 0 allows EBI
         * access. Hopefully we can access DDR through both ports (at least on
         * SAMA5D4x), so we can use the same interface for source and dest,
         * that solves the fact we don't know the direction.
         * ERRATA: Even if useless for memory transfers, the PERID has to not
         * match the one of another channel. If not, it could lead to spurious
         * flag status.
         * For SAMA7G5x case, the SIF and DIF fields are no longer used.
         * Thus, no need to have the SIF/DIF interfaces here.
         * For SAMA5D4x and SAMA5D2x the SIF and DIF are already configured as
         * zero.
         */
        u32                     chan_cc = AT_XDMAC_CC_PERID(0x7f)
                                        | AT_XDMAC_CC_DAM_UBS_AM
                                        | AT_XDMAC_CC_SAM_INCREMENTED_AM
                                        | AT_XDMAC_CC_MBSIZE_SIXTEEN
                                        | AT_XDMAC_CC_MEMSET_HW_MODE
                                        | AT_XDMAC_CC_TYPE_MEM_TRAN;

        dwidth = at_xdmac_align_width(chan, dst_addr);

        if (len >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) {
                dev_err(chan2dev(chan),
                        "%s: Transfer too large, aborting...\n",
                        __func__);
                return NULL;
        }

        spin_lock_irqsave(&atchan->lock, flags);
        desc = at_xdmac_get_desc(atchan);
        spin_unlock_irqrestore(&atchan->lock, flags);
        if (!desc) {
                dev_err(chan2dev(chan), "can't get descriptor\n");
                return NULL;
        }

        chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);

        /* Only the first byte of value is to be used according to dmaengine */
        pattern = (char)value;

        ublen = len >> dwidth;

        desc->lld.mbr_da = dst_addr;
        desc->lld.mbr_ds = (pattern << 24) |
                           (pattern << 16) |
                           (pattern << 8) |
                           pattern;
        desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3
                | AT_XDMAC_MBR_UBC_NDEN
                | AT_XDMAC_MBR_UBC_NSEN
                | ublen;
        desc->lld.mbr_cfg = chan_cc;

        dev_dbg(chan2dev(chan),
                "%s: lld: mbr_da=%pad, mbr_ds=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
                __func__, &desc->lld.mbr_da, desc->lld.mbr_ds, desc->lld.mbr_ubc,
                desc->lld.mbr_cfg);

        return desc;
}

static struct dma_async_tx_descriptor *
at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
                         size_t len, unsigned long flags)
{
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac_desc    *desc;

        dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%zu, pattern=0x%x, flags=0x%lx\n",
                __func__, &dest, len, value, flags);

        if (unlikely(!len))
                return NULL;

        desc = at_xdmac_memset_create_desc(chan, atchan, dest, len, value);
        if (!desc)
                return NULL;
        list_add_tail(&desc->desc_node, &desc->descs_list);

        desc->tx_dma_desc.cookie = -EBUSY;
        desc->tx_dma_desc.flags = flags;
        desc->xfer_size = len;

        return &desc->tx_dma_desc;
}

static struct dma_async_tx_descriptor *
at_xdmac_prep_dma_memset_sg(struct dma_chan *chan, struct scatterlist *sgl,
                            unsigned int sg_len, int value,
                            unsigned long flags)
{
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac_desc    *desc, *pdesc = NULL,
                                *ppdesc = NULL, *first = NULL;
        struct scatterlist      *sg, *psg = NULL, *ppsg = NULL;
        size_t                  stride = 0, pstride = 0, len = 0;
        int                     i;

        if (!sgl)
                return NULL;

        dev_dbg(chan2dev(chan), "%s: sg_len=%d, value=0x%x, flags=0x%lx\n",
                __func__, sg_len, value, flags);

        /* Prepare descriptors. */
        for_each_sg(sgl, sg, sg_len, i) {
                dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%d, pattern=0x%x, flags=0x%lx\n",
                        __func__, &sg_dma_address(sg), sg_dma_len(sg),
                        value, flags);
                desc = at_xdmac_memset_create_desc(chan, atchan,
                                                   sg_dma_address(sg),
                                                   sg_dma_len(sg),
                                                   value);
                if (!desc && first)
                        list_splice_tail_init(&first->descs_list,
                                              &atchan->free_descs_list);

                if (!first)
                        first = desc;

                /* Update our strides */
                pstride = stride;
                if (psg)
                        stride = sg_dma_address(sg) -
                                (sg_dma_address(psg) + sg_dma_len(psg));

                /*
                 * The scatterlist API gives us only the address and
                 * length of each elements.
                 *
                 * Unfortunately, we don't have the stride, which we
                 * will need to compute.
                 *
                 * That make us end up in a situation like this one:
                 *    len    stride    len    stride    len
                 * +-------+        +-------+        +-------+
                 * |  N-2  |        |  N-1  |        |   N   |
                 * +-------+        +-------+        +-------+
                 *
                 * We need all these three elements (N-2, N-1 and N)
                 * to actually take the decision on whether we need to
                 * queue N-1 or reuse N-2.
                 *
                 * We will only consider N if it is the last element.
                 */
                if (ppdesc && pdesc) {
                        if ((stride == pstride) &&
                            (sg_dma_len(ppsg) == sg_dma_len(psg))) {
                                dev_dbg(chan2dev(chan),
                                        "%s: desc 0x%p can be merged with desc 0x%p\n",
                                        __func__, pdesc, ppdesc);

                                /*
                                 * Increment the block count of the
                                 * N-2 descriptor
                                 */
                                at_xdmac_increment_block_count(chan, ppdesc);
                                ppdesc->lld.mbr_dus = stride;

                                /*
                                 * Put back the N-1 descriptor in the
                                 * free descriptor list
                                 */
                                list_add_tail(&pdesc->desc_node,
                                              &atchan->free_descs_list);

                                /*
                                 * Make our N-1 descriptor pointer
                                 * point to the N-2 since they were
                                 * actually merged.
                                 */
                                pdesc = ppdesc;

                        /*
                         * Rule out the case where we don't have
                         * pstride computed yet (our second sg
                         * element)
                         *
                         * We also want to catch the case where there
                         * would be a negative stride,
                         */
                        } else if (pstride ||
                                   sg_dma_address(sg) < sg_dma_address(psg)) {
                                /*
                                 * Queue the N-1 descriptor after the
                                 * N-2
                                 */
                                at_xdmac_queue_desc(chan, ppdesc, pdesc);

                                /*
                                 * Add the N-1 descriptor to the list
                                 * of the descriptors used for this
                                 * transfer
                                 */
                                list_add_tail(&desc->desc_node,
                                              &first->descs_list);
                                dev_dbg(chan2dev(chan),
                                        "%s: add desc 0x%p to descs_list 0x%p\n",
                                        __func__, desc, first);
                        }
                }

                /*
                 * If we are the last element, just see if we have the
                 * same size than the previous element.
                 *
                 * If so, we can merge it with the previous descriptor
                 * since we don't care about the stride anymore.
                 */
                if ((i == (sg_len - 1)) &&
                    sg_dma_len(psg) == sg_dma_len(sg)) {
                        dev_dbg(chan2dev(chan),
                                "%s: desc 0x%p can be merged with desc 0x%p\n",
                                __func__, desc, pdesc);

                        /*
                         * Increment the block count of the N-1
                         * descriptor
                         */
                        at_xdmac_increment_block_count(chan, pdesc);
                        pdesc->lld.mbr_dus = stride;

                        /*
                         * Put back the N descriptor in the free
                         * descriptor list
                         */
                        list_add_tail(&desc->desc_node,
                                      &atchan->free_descs_list);
                }

                /* Update our descriptors */
                ppdesc = pdesc;
                pdesc = desc;

                /* Update our scatter pointers */
                ppsg = psg;
                psg = sg;

                len += sg_dma_len(sg);
        }

        first->tx_dma_desc.cookie = -EBUSY;
        first->tx_dma_desc.flags = flags;
        first->xfer_size = len;

        return &first->tx_dma_desc;
}

static enum dma_status
at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
                   struct dma_tx_state *txstate)
{
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac         *atxdmac = to_at_xdmac(atchan->chan.device);
        struct at_xdmac_desc    *desc, *_desc, *iter;
        struct list_head        *descs_list;
        enum dma_status         ret;
        int                     residue, retry, pm_status;
        u32                     cur_nda, check_nda, cur_ubc, mask, value;
        u8                      dwidth = 0;
        unsigned long           flags;
        bool                    initd;

        ret = dma_cookie_status(chan, cookie, txstate);
        if (ret == DMA_COMPLETE || !txstate)
                return ret;

        pm_status = pm_runtime_resume_and_get(atxdmac->dev);
        if (pm_status < 0)
                return DMA_ERROR;

        spin_lock_irqsave(&atchan->lock, flags);

        desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, xfer_node);

        /*
         * If the transfer has not been started yet, don't need to compute the
         * residue, it's the transfer length.
         */
        if (!desc->active_xfer) {
                dma_set_residue(txstate, desc->xfer_size);
                goto spin_unlock;
        }

        residue = desc->xfer_size;
        /*
         * Flush FIFO: only relevant when the transfer is source peripheral
         * synchronized. Flush is needed before reading CUBC because data in
         * the FIFO are not reported by CUBC. Reporting a residue of the
         * transfer length while we have data in FIFO can cause issue.
         * Usecase: atmel USART has a timeout which means I have received
         * characters but there is no more character received for a while. On
         * timeout, it requests the residue. If the data are in the DMA FIFO,
         * we will return a residue of the transfer length. It means no data
         * received. If an application is waiting for these data, it will hang
         * since we won't have another USART timeout without receiving new
         * data.
         */
        mask = AT_XDMAC_CC_TYPE | AT_XDMAC_CC_DSYNC;
        value = AT_XDMAC_CC_TYPE_PER_TRAN | AT_XDMAC_CC_DSYNC_PER2MEM;
        if ((desc->lld.mbr_cfg & mask) == value) {
                at_xdmac_write(atxdmac, atxdmac->layout->gswf, atchan->mask);
                while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS))
                        cpu_relax();
        }

        /*
         * The easiest way to compute the residue should be to pause the DMA
         * but doing this can lead to miss some data as some devices don't
         * have FIFO.
         * We need to read several registers because:
         * - DMA is running therefore a descriptor change is possible while
         * reading these registers
         * - When the block transfer is done, the value of the CUBC register
         * is set to its initial value until the fetch of the next descriptor.
         * This value will corrupt the residue calculation so we have to skip
         * it.
         *
         * INITD --------                    ------------
         *              |____________________|
         *       _______________________  _______________
         * NDA       @desc2             \/   @desc3
         *       _______________________/\_______________
         *       __________  ___________  _______________
         * CUBC       0    \/ MAX desc1 \/  MAX desc2
         *       __________/\___________/\_______________
         *
         * Since descriptors are aligned on 64 bits, we can assume that
         * the update of NDA and CUBC is atomic.
         * Memory barriers are used to ensure the read order of the registers.
         * A max number of retries is set because unlikely it could never ends.
         */
        for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) {
                check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
                rmb();
                cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
                rmb();
                initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD);
                rmb();
                cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
                rmb();

                if ((check_nda == cur_nda) && initd)
                        break;
        }

        if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) {
                ret = DMA_ERROR;
                goto spin_unlock;
        }

        /*
         * Flush FIFO: only relevant when the transfer is source peripheral
         * synchronized. Another flush is needed here because CUBC is updated
         * when the controller sends the data write command. It can lead to
         * report data that are not written in the memory or the device. The
         * FIFO flush ensures that data are really written.
         */
        if ((desc->lld.mbr_cfg & mask) == value) {
                at_xdmac_write(atxdmac, atxdmac->layout->gswf, atchan->mask);
                while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS))
                        cpu_relax();
        }

        /*
         * Remove size of all microblocks already transferred and the current
         * one. Then add the remaining size to transfer of the current
         * microblock.
         */
        descs_list = &desc->descs_list;
        list_for_each_entry_safe(iter, _desc, descs_list, desc_node) {
                dwidth = at_xdmac_get_dwidth(iter->lld.mbr_cfg);
                residue -= (iter->lld.mbr_ubc & 0xffffff) << dwidth;
                if ((iter->lld.mbr_nda & 0xfffffffc) == cur_nda) {
                        desc = iter;
                        break;
                }
        }
        residue += cur_ubc << dwidth;

        dma_set_residue(txstate, residue);

        dev_dbg(chan2dev(chan),
                 "%s: desc=0x%p, tx_dma_desc.phys=%pad, tx_status=%d, cookie=%d, residue=%d\n",
                 __func__, desc, &desc->tx_dma_desc.phys, ret, cookie, residue);

spin_unlock:
        spin_unlock_irqrestore(&atchan->lock, flags);
        pm_runtime_put_autosuspend(atxdmac->dev);
        return ret;
}

static void at_xdmac_advance_work(struct at_xdmac_chan *atchan)
{
        struct at_xdmac_desc    *desc;

        /*
         * If channel is enabled, do nothing, advance_work will be triggered
         * after the interruption.
         */
        if (at_xdmac_chan_is_enabled(atchan) || list_empty(&atchan->xfers_list))
                return;

        desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc,
                                xfer_node);
        dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
        if (!desc->active_xfer)
                at_xdmac_start_xfer(atchan, desc);
}

static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan)
{
        struct at_xdmac_desc            *desc;
        struct dma_async_tx_descriptor  *txd;

        spin_lock_irq(&atchan->lock);
        dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n",
                __func__, atchan->irq_status);
        if (list_empty(&atchan->xfers_list)) {
                spin_unlock_irq(&atchan->lock);
                return;
        }
        desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc,
                                xfer_node);
        spin_unlock_irq(&atchan->lock);
        txd = &desc->tx_dma_desc;
        if (txd->flags & DMA_PREP_INTERRUPT)
                dmaengine_desc_get_callback_invoke(txd, NULL);
}

/* Called with atchan->lock held. */
static void at_xdmac_handle_error(struct at_xdmac_chan *atchan)
{
        struct at_xdmac         *atxdmac = to_at_xdmac(atchan->chan.device);
        struct at_xdmac_desc    *bad_desc;
        int                     ret;

        ret = pm_runtime_resume_and_get(atxdmac->dev);
        if (ret < 0)
                return;

        /*
         * The descriptor currently at the head of the active list is
         * broken. Since we don't have any way to report errors, we'll
         * just have to scream loudly and try to continue with other
         * descriptors queued (if any).
         */
        if (atchan->irq_status & AT_XDMAC_CIS_RBEIS)
                dev_err(chan2dev(&atchan->chan), "read bus error!!!");
        if (atchan->irq_status & AT_XDMAC_CIS_WBEIS)
                dev_err(chan2dev(&atchan->chan), "write bus error!!!");
        if (atchan->irq_status & AT_XDMAC_CIS_ROIS)
                dev_err(chan2dev(&atchan->chan), "request overflow error!!!");

        /* Channel must be disabled first as it's not done automatically */
        at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
        while (at_xdmac_read(atxdmac, AT_XDMAC_GS) & atchan->mask)
                cpu_relax();

        bad_desc = list_first_entry(&atchan->xfers_list,
                                    struct at_xdmac_desc,
                                    xfer_node);

        /* Print bad descriptor's details if needed */
        dev_dbg(chan2dev(&atchan->chan),
                "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
                __func__, &bad_desc->lld.mbr_sa, &bad_desc->lld.mbr_da,
                bad_desc->lld.mbr_ubc);

        pm_runtime_put_autosuspend(atxdmac->dev);

        /* Then continue with usual descriptor management */
}

static void at_xdmac_tasklet(struct tasklet_struct *t)
{
        struct at_xdmac_chan    *atchan = from_tasklet(atchan, t, tasklet);
        struct at_xdmac         *atxdmac = to_at_xdmac(atchan->chan.device);
        struct at_xdmac_desc    *desc;
        struct dma_async_tx_descriptor *txd;
        u32                     error_mask;

        if (at_xdmac_chan_is_cyclic(atchan))
                return at_xdmac_handle_cyclic(atchan);

        error_mask = AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS |
                AT_XDMAC_CIS_ROIS;

        spin_lock_irq(&atchan->lock);

        dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n",
                __func__, atchan->irq_status);

        if (!(atchan->irq_status & AT_XDMAC_CIS_LIS) &&
            !(atchan->irq_status & error_mask)) {
                spin_unlock_irq(&atchan->lock);
                return;
        }

        if (atchan->irq_status & error_mask)
                at_xdmac_handle_error(atchan);

        desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc,
                                xfer_node);
        dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
        if (!desc->active_xfer) {
                dev_err(chan2dev(&atchan->chan), "Xfer not active: exiting");
                spin_unlock_irq(&atchan->lock);
                return;
        }

        txd = &desc->tx_dma_desc;
        dma_cookie_complete(txd);
        /* Remove the transfer from the transfer list. */
        list_del(&desc->xfer_node);
        spin_unlock_irq(&atchan->lock);

        if (txd->flags & DMA_PREP_INTERRUPT)
                dmaengine_desc_get_callback_invoke(txd, NULL);

        dma_run_dependencies(txd);

        spin_lock_irq(&atchan->lock);
        /* Move the xfer descriptors into the free descriptors list. */
        list_splice_tail_init(&desc->descs_list, &atchan->free_descs_list);
        at_xdmac_advance_work(atchan);
        spin_unlock_irq(&atchan->lock);

        /*
         * Decrement runtime PM ref counter incremented in
         * at_xdmac_start_xfer().
         */
        pm_runtime_put_autosuspend(atxdmac->dev);
}

static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id)
{
        struct at_xdmac         *atxdmac = (struct at_xdmac *)dev_id;
        struct at_xdmac_chan    *atchan;
        u32                     imr, status, pending;
        u32                     chan_imr, chan_status;
        int                     i, ret = IRQ_NONE;

        do {
                imr = at_xdmac_read(atxdmac, AT_XDMAC_GIM);
                status = at_xdmac_read(atxdmac, AT_XDMAC_GIS);
                pending = status & imr;

                dev_vdbg(atxdmac->dma.dev,
                         "%s: status=0x%08x, imr=0x%08x, pending=0x%08x\n",
                         __func__, status, imr, pending);

                if (!pending)
                        break;

                /* We have to find which channel has generated the interrupt. */
                for (i = 0; i < atxdmac->dma.chancnt; i++) {
                        if (!((1 << i) & pending))
                                continue;

                        atchan = &atxdmac->chan[i];
                        chan_imr = at_xdmac_chan_read(atchan, AT_XDMAC_CIM);
                        chan_status = at_xdmac_chan_read(atchan, AT_XDMAC_CIS);
                        atchan->irq_status = chan_status & chan_imr;
                        dev_vdbg(atxdmac->dma.dev,
                                 "%s: chan%d: imr=0x%x, status=0x%x\n",
                                 __func__, i, chan_imr, chan_status);
                        dev_vdbg(chan2dev(&atchan->chan),
                                 "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
                                 __func__,
                                 at_xdmac_chan_read(atchan, AT_XDMAC_CC),
                                 at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
                                 at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
                                 at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
                                 at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
                                 at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));

                        if (atchan->irq_status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS))
                                at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);

                        tasklet_schedule(&atchan->tasklet);
                        ret = IRQ_HANDLED;
                }

        } while (pending);

        return ret;
}

static void at_xdmac_issue_pending(struct dma_chan *chan)
{
        struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
        unsigned long flags;

        dev_dbg(chan2dev(&atchan->chan), "%s\n", __func__);

        spin_lock_irqsave(&atchan->lock, flags);
        at_xdmac_advance_work(atchan);
        spin_unlock_irqrestore(&atchan->lock, flags);

        return;
}

static int at_xdmac_device_config(struct dma_chan *chan,
                                  struct dma_slave_config *config)
{
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        int ret;
        unsigned long           flags;

        dev_dbg(chan2dev(chan), "%s\n", __func__);

        spin_lock_irqsave(&atchan->lock, flags);
        ret = at_xdmac_set_slave_config(chan, config);
        spin_unlock_irqrestore(&atchan->lock, flags);

        return ret;
}

static void at_xdmac_device_pause_set(struct at_xdmac *atxdmac,
                                      struct at_xdmac_chan *atchan)
{
        at_xdmac_write(atxdmac, atxdmac->layout->grws, atchan->mask);
        while (at_xdmac_chan_read(atchan, AT_XDMAC_CC) &
               (AT_XDMAC_CC_WRIP | AT_XDMAC_CC_RDIP))
                cpu_relax();
}

static void at_xdmac_device_pause_internal(struct at_xdmac_chan *atchan)
{
        struct at_xdmac         *atxdmac = to_at_xdmac(atchan->chan.device);
        unsigned long           flags;

        spin_lock_irqsave(&atchan->lock, flags);
        set_bit(AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, &atchan->status);
        at_xdmac_device_pause_set(atxdmac, atchan);
        spin_unlock_irqrestore(&atchan->lock, flags);
}

static int at_xdmac_device_pause(struct dma_chan *chan)
{
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac         *atxdmac = to_at_xdmac(atchan->chan.device);
        unsigned long           flags;
        int                     ret;

        dev_dbg(chan2dev(chan), "%s\n", __func__);

        if (test_and_set_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status))
                return 0;

        ret = pm_runtime_resume_and_get(atxdmac->dev);
        if (ret < 0)
                return ret;

        spin_lock_irqsave(&atchan->lock, flags);

        at_xdmac_device_pause_set(atxdmac, atchan);
        /* Decrement runtime PM ref counter for each active descriptor. */
        at_xdmac_runtime_suspend_descriptors(atchan);

        spin_unlock_irqrestore(&atchan->lock, flags);

        pm_runtime_put_autosuspend(atxdmac->dev);

        return 0;
}

static void at_xdmac_device_resume_internal(struct at_xdmac_chan *atchan)
{
        struct at_xdmac         *atxdmac = to_at_xdmac(atchan->chan.device);
        unsigned long           flags;

        spin_lock_irqsave(&atchan->lock, flags);
        at_xdmac_write(atxdmac, atxdmac->layout->grwr, atchan->mask);
        clear_bit(AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, &atchan->status);
        spin_unlock_irqrestore(&atchan->lock, flags);
}

static int at_xdmac_device_resume(struct dma_chan *chan)
{
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac         *atxdmac = to_at_xdmac(atchan->chan.device);
        unsigned long           flags;
        int                     ret;

        dev_dbg(chan2dev(chan), "%s\n", __func__);

        ret = pm_runtime_resume_and_get(atxdmac->dev);
        if (ret < 0)
                return ret;

        spin_lock_irqsave(&atchan->lock, flags);
        if (!at_xdmac_chan_is_paused(atchan))
                goto unlock;

        /* Increment runtime PM ref counter for each active descriptor. */
        ret = at_xdmac_runtime_resume_descriptors(atchan);
        if (ret < 0)
                goto unlock;

        at_xdmac_write(atxdmac, atxdmac->layout->grwr, atchan->mask);
        clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);

unlock:
        spin_unlock_irqrestore(&atchan->lock, flags);
        pm_runtime_put_autosuspend(atxdmac->dev);

        return ret;
}

static int at_xdmac_device_terminate_all(struct dma_chan *chan)
{
        struct at_xdmac_desc    *desc, *_desc;
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac         *atxdmac = to_at_xdmac(atchan->chan.device);
        unsigned long           flags;
        int                     ret;

        dev_dbg(chan2dev(chan), "%s\n", __func__);

        ret = pm_runtime_resume_and_get(atxdmac->dev);
        if (ret < 0)
                return ret;

        spin_lock_irqsave(&atchan->lock, flags);
        at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
        while (at_xdmac_read(atxdmac, AT_XDMAC_GS) & atchan->mask)
                cpu_relax();

        /* Cancel all pending transfers. */
        list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) {
                list_del(&desc->xfer_node);
                list_splice_tail_init(&desc->descs_list,
                                      &atchan->free_descs_list);
                /*
                 * We incremented the runtime PM reference count on
                 * at_xdmac_start_xfer() for this descriptor. Now it's time
                 * to release it.
                 */
                if (desc->active_xfer)
                        pm_runtime_put_noidle(atxdmac->dev);
        }

        clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
        clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status);
        spin_unlock_irqrestore(&atchan->lock, flags);

        pm_runtime_put_autosuspend(atxdmac->dev);

        return 0;
}

static int at_xdmac_alloc_chan_resources(struct dma_chan *chan)
{
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac_desc    *desc;
        int                     i;

        if (at_xdmac_chan_is_enabled(atchan)) {
                dev_err(chan2dev(chan),
                        "can't allocate channel resources (channel enabled)\n");
                return -EIO;
        }

        if (!list_empty(&atchan->free_descs_list)) {
                dev_err(chan2dev(chan),
                        "can't allocate channel resources (channel not free from a previous use)\n");
                return -EIO;
        }

        for (i = 0; i < init_nr_desc_per_channel; i++) {
                desc = at_xdmac_alloc_desc(chan, GFP_KERNEL);
                if (!desc) {
                        if (i == 0) {
                                dev_warn(chan2dev(chan),
                                         "can't allocate any descriptors\n");
                                return -EIO;
                        }
                        dev_warn(chan2dev(chan),
                                "only %d descriptors have been allocated\n", i);
                        break;
                }
                list_add_tail(&desc->desc_node, &atchan->free_descs_list);
        }

        dma_cookie_init(chan);

        dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i);

        return i;
}

static void at_xdmac_free_chan_resources(struct dma_chan *chan)
{
        struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);
        struct at_xdmac         *atxdmac = to_at_xdmac(chan->device);
        struct at_xdmac_desc    *desc, *_desc;

        list_for_each_entry_safe(desc, _desc, &atchan->free_descs_list, desc_node) {
                dev_dbg(chan2dev(chan), "%s: freeing descriptor %p\n", __func__, desc);
                list_del(&desc->desc_node);
                dma_pool_free(atxdmac->at_xdmac_desc_pool, desc, desc->tx_dma_desc.phys);
        }

        return;
}

static void at_xdmac_axi_config(struct platform_device *pdev)
{
        struct at_xdmac *atxdmac = (struct at_xdmac *)platform_get_drvdata(pdev);
        bool dev_m2m = false;
        u32 dma_requests;

        if (!atxdmac->layout->axi_config)
                return; /* Not supported */

        if (!of_property_read_u32(pdev->dev.of_node, "dma-requests",
                                  &dma_requests)) {
                dev_info(&pdev->dev, "controller in mem2mem mode.\n");
                dev_m2m = true;
        }

        if (dev_m2m) {
                at_xdmac_write(atxdmac, AT_XDMAC_GCFG, AT_XDMAC_GCFG_M2M);
                at_xdmac_write(atxdmac, AT_XDMAC_GWAC, AT_XDMAC_GWAC_M2M);
        } else {
                at_xdmac_write(atxdmac, AT_XDMAC_GCFG, AT_XDMAC_GCFG_P2M);
                at_xdmac_write(atxdmac, AT_XDMAC_GWAC, AT_XDMAC_GWAC_P2M);
        }
}

static int __maybe_unused atmel_xdmac_prepare(struct device *dev)
{
        struct at_xdmac         *atxdmac = dev_get_drvdata(dev);
        struct dma_chan         *chan, *_chan;

        list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
                struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);

                /* Wait for transfer completion, except in cyclic case. */
                if (at_xdmac_chan_is_enabled(atchan) && !at_xdmac_chan_is_cyclic(atchan))
                        return -EAGAIN;
        }
        return 0;
}

static int __maybe_unused atmel_xdmac_suspend(struct device *dev)
{
        struct at_xdmac         *atxdmac = dev_get_drvdata(dev);
        struct dma_chan         *chan, *_chan;
        int                     ret;

        ret = pm_runtime_resume_and_get(atxdmac->dev);
        if (ret < 0)
                return ret;

        list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
                struct at_xdmac_chan    *atchan = to_at_xdmac_chan(chan);

                atchan->save_cc = at_xdmac_chan_read(atchan, AT_XDMAC_CC);
                if (at_xdmac_chan_is_cyclic(atchan)) {
                        if (!at_xdmac_chan_is_paused(atchan)) {
                                dev_warn(chan2dev(chan), "%s: channel %d not paused\n",
                                         __func__, chan->chan_id);
                                at_xdmac_device_pause_internal(atchan);
                                at_xdmac_runtime_suspend_descriptors(atchan);
                        }
                        atchan->save_cim = at_xdmac_chan_read(atchan, AT_XDMAC_CIM);
                        atchan->save_cnda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA);
                        atchan->save_cndc = at_xdmac_chan_read(atchan, AT_XDMAC_CNDC);
                }
        }
        atxdmac->save_gim = at_xdmac_read(atxdmac, AT_XDMAC_GIM);
        atxdmac->save_gs = at_xdmac_read(atxdmac, AT_XDMAC_GS);

        at_xdmac_off(atxdmac, false);
        pm_runtime_mark_last_busy(atxdmac->dev);
        pm_runtime_put_noidle(atxdmac->dev);
        clk_disable_unprepare(atxdmac->clk);

        return 0;
}

static int __maybe_unused atmel_xdmac_resume(struct device *dev)
{
        struct at_xdmac         *atxdmac = dev_get_drvdata(dev);
        struct at_xdmac_chan    *atchan;
        struct dma_chan         *chan, *_chan;
        struct platform_device  *pdev = container_of(dev, struct platform_device, dev);
        int                     i, ret;

        ret = clk_prepare_enable(atxdmac->clk);
        if (ret)
                return ret;

        pm_runtime_get_noresume(atxdmac->dev);

        at_xdmac_axi_config(pdev);

        /* Clear pending interrupts. */
        for (i = 0; i < atxdmac->dma.chancnt; i++) {
                atchan = &atxdmac->chan[i];
                while (at_xdmac_chan_read(atchan, AT_XDMAC_CIS))
                        cpu_relax();
        }

        at_xdmac_write(atxdmac, AT_XDMAC_GIE, atxdmac->save_gim);
        list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
                atchan = to_at_xdmac_chan(chan);

                at_xdmac_chan_write(atchan, AT_XDMAC_CC, atchan->save_cc);
                if (at_xdmac_chan_is_cyclic(atchan)) {
                        /*
                         * Resume only channels not explicitly paused by
                         * consumers.
                         */
                        if (at_xdmac_chan_is_paused_internal(atchan)) {
                                ret = at_xdmac_runtime_resume_descriptors(atchan);
                                if (ret < 0)
                                        return ret;
                                at_xdmac_device_resume_internal(atchan);
                        }

                        /*
                         * We may resume from a deep sleep state where power
                         * to DMA controller is cut-off. Thus, restore the
                         * suspend state of channels set though dmaengine API.
                         */
                        else if (at_xdmac_chan_is_paused(atchan))
                                at_xdmac_device_pause_set(atxdmac, atchan);

                        at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, atchan->save_cnda);
                        at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc);
                        at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim);
                        wmb();
                        if (atxdmac->save_gs & atchan->mask)
                                at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask);
                }
        }

        pm_runtime_put_autosuspend(atxdmac->dev);

        return 0;
}

static int __maybe_unused atmel_xdmac_runtime_suspend(struct device *dev)
{
        struct at_xdmac *atxdmac = dev_get_drvdata(dev);

        clk_disable(atxdmac->clk);

        return 0;
}

static int __maybe_unused atmel_xdmac_runtime_resume(struct device *dev)
{
        struct at_xdmac *atxdmac = dev_get_drvdata(dev);

        return clk_enable(atxdmac->clk);
}

static inline int at_xdmac_get_channel_number(struct platform_device *pdev,
                                              u32 reg, u32 *pchannels)
{
        int     ret;

        if (reg) {
                *pchannels = AT_XDMAC_NB_CH(reg);
                return 0;
        }

        ret = of_property_read_u32(pdev->dev.of_node, "dma-channels", pchannels);
        if (ret)
                dev_err(&pdev->dev, "can't get number of channels\n");

        return ret;
}

static int at_xdmac_probe(struct platform_device *pdev)
{
        struct at_xdmac *atxdmac;
        int             irq, ret;
        void __iomem    *base;
        u32             nr_channels, i, reg;

        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
                return irq;

        base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(base))
                return PTR_ERR(base);

        /*
         * Read number of xdmac channels, read helper function can't be used
         * since atxdmac is not yet allocated and we need to know the number
         * of channels to do the allocation.
         */
        reg = readl_relaxed(base + AT_XDMAC_GTYPE);
        ret = at_xdmac_get_channel_number(pdev, reg, &nr_channels);
        if (ret)
                return ret;

        if (nr_channels > AT_XDMAC_MAX_CHAN) {
                dev_err(&pdev->dev, "invalid number of channels (%u)\n",
                        nr_channels);
                return -EINVAL;
        }

        atxdmac = devm_kzalloc(&pdev->dev,
                               struct_size(atxdmac, chan, nr_channels),
                               GFP_KERNEL);
        if (!atxdmac) {
                dev_err(&pdev->dev, "can't allocate at_xdmac structure\n");
                return -ENOMEM;
        }

        atxdmac->regs = base;
        atxdmac->irq = irq;
        atxdmac->dev = &pdev->dev;

        atxdmac->layout = of_device_get_match_data(&pdev->dev);
        if (!atxdmac->layout)
                return -ENODEV;

        atxdmac->clk = devm_clk_get(&pdev->dev, "dma_clk");
        if (IS_ERR(atxdmac->clk)) {
                dev_err(&pdev->dev, "can't get dma_clk\n");
                return PTR_ERR(atxdmac->clk);
        }

        /* Do not use dev res to prevent races with tasklet */
        ret = request_irq(atxdmac->irq, at_xdmac_interrupt, 0, "at_xdmac", atxdmac);
        if (ret) {
                dev_err(&pdev->dev, "can't request irq\n");
                return ret;
        }

        ret = clk_prepare_enable(atxdmac->clk);
        if (ret) {
                dev_err(&pdev->dev, "can't prepare or enable clock\n");
                goto err_free_irq;
        }

        atxdmac->at_xdmac_desc_pool =
                dmam_pool_create(dev_name(&pdev->dev), &pdev->dev,
                                sizeof(struct at_xdmac_desc), 4, 0);
        if (!atxdmac->at_xdmac_desc_pool) {
                dev_err(&pdev->dev, "no memory for descriptors dma pool\n");
                ret = -ENOMEM;
                goto err_clk_disable;
        }

        dma_cap_set(DMA_CYCLIC, atxdmac->dma.cap_mask);
        dma_cap_set(DMA_INTERLEAVE, atxdmac->dma.cap_mask);
        dma_cap_set(DMA_MEMCPY, atxdmac->dma.cap_mask);
        dma_cap_set(DMA_MEMSET, atxdmac->dma.cap_mask);
        dma_cap_set(DMA_MEMSET_SG, atxdmac->dma.cap_mask);
        dma_cap_set(DMA_SLAVE, atxdmac->dma.cap_mask);
        /*
         * Without DMA_PRIVATE the driver is not able to allocate more than
         * one channel, second allocation fails in private_candidate.
         */
        dma_cap_set(DMA_PRIVATE, atxdmac->dma.cap_mask);
        atxdmac->dma.dev                                = &pdev->dev;
        atxdmac->dma.device_alloc_chan_resources        = at_xdmac_alloc_chan_resources;
        atxdmac->dma.device_free_chan_resources         = at_xdmac_free_chan_resources;
        atxdmac->dma.device_tx_status                   = at_xdmac_tx_status;
        atxdmac->dma.device_issue_pending               = at_xdmac_issue_pending;
        atxdmac->dma.device_prep_dma_cyclic             = at_xdmac_prep_dma_cyclic;
        atxdmac->dma.device_prep_interleaved_dma        = at_xdmac_prep_interleaved;
        atxdmac->dma.device_prep_dma_memcpy             = at_xdmac_prep_dma_memcpy;
        atxdmac->dma.device_prep_dma_memset             = at_xdmac_prep_dma_memset;
        atxdmac->dma.device_prep_dma_memset_sg          = at_xdmac_prep_dma_memset_sg;
        atxdmac->dma.device_prep_slave_sg               = at_xdmac_prep_slave_sg;
        atxdmac->dma.device_config                      = at_xdmac_device_config;
        atxdmac->dma.device_pause                       = at_xdmac_device_pause;
        atxdmac->dma.device_resume                      = at_xdmac_device_resume;
        atxdmac->dma.device_terminate_all               = at_xdmac_device_terminate_all;
        atxdmac->dma.src_addr_widths = AT_XDMAC_DMA_BUSWIDTHS;
        atxdmac->dma.dst_addr_widths = AT_XDMAC_DMA_BUSWIDTHS;
        atxdmac->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
        atxdmac->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;

        platform_set_drvdata(pdev, atxdmac);

        pm_runtime_set_autosuspend_delay(&pdev->dev, 500);
        pm_runtime_use_autosuspend(&pdev->dev);
        pm_runtime_set_active(&pdev->dev);
        pm_runtime_enable(&pdev->dev);
        pm_runtime_get_noresume(&pdev->dev);

        /* Init channels. */
        INIT_LIST_HEAD(&atxdmac->dma.channels);

        /* Disable all chans and interrupts. */
        at_xdmac_off(atxdmac, true);

        for (i = 0; i < nr_channels; i++) {
                struct at_xdmac_chan *atchan = &atxdmac->chan[i];

                atchan->chan.device = &atxdmac->dma;
                list_add_tail(&atchan->chan.device_node,
                              &atxdmac->dma.channels);

                atchan->ch_regs = at_xdmac_chan_reg_base(atxdmac, i);
                atchan->mask = 1 << i;

                spin_lock_init(&atchan->lock);
                INIT_LIST_HEAD(&atchan->xfers_list);
                INIT_LIST_HEAD(&atchan->free_descs_list);
                tasklet_setup(&atchan->tasklet, at_xdmac_tasklet);

                /* Clear pending interrupts. */
                while (at_xdmac_chan_read(atchan, AT_XDMAC_CIS))
                        cpu_relax();
        }

        ret = dma_async_device_register(&atxdmac->dma);
        if (ret) {
                dev_err(&pdev->dev, "fail to register DMA engine device\n");
                goto err_pm_disable;
        }

        ret = of_dma_controller_register(pdev->dev.of_node,
                                         at_xdmac_xlate, atxdmac);
        if (ret) {
                dev_err(&pdev->dev, "could not register of dma controller\n");
                goto err_dma_unregister;
        }

        dev_info(&pdev->dev, "%d channels, mapped at 0x%p\n",
                 nr_channels, atxdmac->regs);

        at_xdmac_axi_config(pdev);

        pm_runtime_put_autosuspend(&pdev->dev);

        return 0;

err_dma_unregister:
        dma_async_device_unregister(&atxdmac->dma);
err_pm_disable:
        pm_runtime_put_noidle(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
        pm_runtime_set_suspended(&pdev->dev);
        pm_runtime_dont_use_autosuspend(&pdev->dev);
err_clk_disable:
        clk_disable_unprepare(atxdmac->clk);
err_free_irq:
        free_irq(atxdmac->irq, atxdmac);
        return ret;
}

static void at_xdmac_remove(struct platform_device *pdev)
{
        struct at_xdmac *atxdmac = (struct at_xdmac *)platform_get_drvdata(pdev);
        int             i;

        at_xdmac_off(atxdmac, true);
        of_dma_controller_free(pdev->dev.of_node);
        dma_async_device_unregister(&atxdmac->dma);
        pm_runtime_disable(atxdmac->dev);
        pm_runtime_set_suspended(&pdev->dev);
        pm_runtime_dont_use_autosuspend(&pdev->dev);
        clk_disable_unprepare(atxdmac->clk);

        free_irq(atxdmac->irq, atxdmac);

        for (i = 0; i < atxdmac->dma.chancnt; i++) {
                struct at_xdmac_chan *atchan = &atxdmac->chan[i];

                tasklet_kill(&atchan->tasklet);
                at_xdmac_free_chan_resources(&atchan->chan);
        }
}

static const struct dev_pm_ops __maybe_unused atmel_xdmac_dev_pm_ops = {
        .prepare        = atmel_xdmac_prepare,
        SET_LATE_SYSTEM_SLEEP_PM_OPS(atmel_xdmac_suspend, atmel_xdmac_resume)
        SET_RUNTIME_PM_OPS(atmel_xdmac_runtime_suspend,
                           atmel_xdmac_runtime_resume, NULL)
};

static const struct of_device_id atmel_xdmac_dt_ids[] = {
        {
                .compatible = "atmel,sama5d4-dma",
                .data = &at_xdmac_sama5d4_layout,
        }, {
                .compatible = "microchip,sama7g5-dma",
                .data = &at_xdmac_sama7g5_layout,
        }, {
                /* sentinel */
        }
};
MODULE_DEVICE_TABLE(of, atmel_xdmac_dt_ids);

static struct platform_driver at_xdmac_driver = {
        .probe          = at_xdmac_probe,
        .remove         = at_xdmac_remove,
        .driver = {
                .name           = "at_xdmac",
                .of_match_table = of_match_ptr(atmel_xdmac_dt_ids),
                .pm             = pm_ptr(&atmel_xdmac_dev_pm_ops),
        }
};

static int __init at_xdmac_init(void)
{
        return platform_driver_register(&at_xdmac_driver);
}
subsys_initcall(at_xdmac_init);

static void __exit at_xdmac_exit(void)
{
        platform_driver_unregister(&at_xdmac_driver);
}
module_exit(at_xdmac_exit);

MODULE_DESCRIPTION("Atmel Extended DMA Controller driver");
MODULE_AUTHOR("Ludovic Desroches <ludovic.desroches@atmel.com>");
MODULE_LICENSE("GPL");