root/drivers/ata/sata_sx4.c
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  sata_sx4.c - Promise SATA
 *
 *  Maintained by:  Tejun Heo <tj@kernel.org>
 *                  Please ALWAYS copy linux-ide@vger.kernel.org
 *                  on emails.
 *
 *  Copyright 2003-2004 Red Hat, Inc.
 *
 *  libata documentation is available via 'make {ps|pdf}docs',
 *  as Documentation/driver-api/libata.rst
 *
 *  Hardware documentation available under NDA.
 */

/*
        Theory of operation
        -------------------

        The SX4 (PDC20621) chip features a single Host DMA (HDMA) copy
        engine, DIMM memory, and four ATA engines (one per SATA port).
        Data is copied to/from DIMM memory by the HDMA engine, before
        handing off to one (or more) of the ATA engines.  The ATA
        engines operate solely on DIMM memory.

        The SX4 behaves like a PATA chip, with no SATA controls or
        knowledge whatsoever, leading to the presumption that
        PATA<->SATA bridges exist on SX4 boards, external to the
        PDC20621 chip itself.

        The chip is quite capable, supporting an XOR engine and linked
        hardware commands (permits a string to transactions to be
        submitted and waited-on as a single unit), and an optional
        microprocessor.

        The limiting factor is largely software.  This Linux driver was
        written to multiplex the single HDMA engine to copy disk
        transactions into a fixed DIMM memory space, from where an ATA
        engine takes over.  As a result, each WRITE looks like this:

                submit HDMA packet to hardware
                hardware copies data from system memory to DIMM
                hardware raises interrupt

                submit ATA packet to hardware
                hardware executes ATA WRITE command, w/ data in DIMM
                hardware raises interrupt

        and each READ looks like this:

                submit ATA packet to hardware
                hardware executes ATA READ command, w/ data in DIMM
                hardware raises interrupt

                submit HDMA packet to hardware
                hardware copies data from DIMM to system memory
                hardware raises interrupt

        This is a very slow, lock-step way of doing things that can
        certainly be improved by motivated kernel hackers.

 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/device.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_cmnd.h>
#include <linux/libata.h>
#include "sata_promise.h"

#define DRV_NAME        "sata_sx4"
#define DRV_VERSION     "0.12"

static int dimm_test;
module_param(dimm_test, int, 0644);
MODULE_PARM_DESC(dimm_test, "Enable DIMM test during startup (1 = enabled)");

enum {
        PDC_MMIO_BAR            = 3,
        PDC_DIMM_BAR            = 4,

        PDC_PRD_TBL             = 0x44, /* Direct command DMA table addr */

        PDC_PKT_SUBMIT          = 0x40, /* Command packet pointer addr */
        PDC_HDMA_PKT_SUBMIT     = 0x100, /* Host DMA packet pointer addr */
        PDC_INT_SEQMASK         = 0x40, /* Mask of asserted SEQ INTs */
        PDC_HDMA_CTLSTAT        = 0x12C, /* Host DMA control / status */

        PDC_CTLSTAT             = 0x60, /* IDEn control / status */

        PDC_20621_SEQCTL        = 0x400,
        PDC_20621_SEQMASK       = 0x480,
        PDC_20621_GENERAL_CTL   = 0x484,
        PDC_20621_PAGE_SIZE     = (32 * 1024),

        /* chosen, not constant, values; we design our own DIMM mem map */
        PDC_20621_DIMM_WINDOW   = 0x0C, /* page# for 32K DIMM window */
        PDC_20621_DIMM_BASE     = 0x00200000,
        PDC_20621_DIMM_DATA     = (64 * 1024),
        PDC_DIMM_DATA_STEP      = (256 * 1024),
        PDC_DIMM_WINDOW_STEP    = (8 * 1024),
        PDC_DIMM_HOST_PRD       = (6 * 1024),
        PDC_DIMM_HOST_PKT       = (128 * 0),
        PDC_DIMM_HPKT_PRD       = (128 * 1),
        PDC_DIMM_ATA_PKT        = (128 * 2),
        PDC_DIMM_APKT_PRD       = (128 * 3),
        PDC_DIMM_HEADER_SZ      = PDC_DIMM_APKT_PRD + 128,
        PDC_PAGE_WINDOW         = 0x40,
        PDC_PAGE_DATA           = PDC_PAGE_WINDOW +
                                  (PDC_20621_DIMM_DATA / PDC_20621_PAGE_SIZE),
        PDC_PAGE_SET            = PDC_DIMM_DATA_STEP / PDC_20621_PAGE_SIZE,

        PDC_CHIP0_OFS           = 0xC0000, /* offset of chip #0 */

        PDC_20621_ERR_MASK      = (1<<19) | (1<<20) | (1<<21) | (1<<22) |
                                  (1<<23),

        board_20621             = 0,    /* FastTrak S150 SX4 */

        PDC_MASK_INT            = (1 << 10), /* HDMA/ATA mask int */
        PDC_RESET               = (1 << 11), /* HDMA/ATA reset */
        PDC_DMA_ENABLE          = (1 << 7),  /* DMA start/stop */

        PDC_MAX_HDMA            = 32,
        PDC_HDMA_Q_MASK         = (PDC_MAX_HDMA - 1),

        PDC_DIMM0_SPD_DEV_ADDRESS       = 0x50,
        PDC_DIMM1_SPD_DEV_ADDRESS       = 0x51,
        PDC_I2C_CONTROL                 = 0x48,
        PDC_I2C_ADDR_DATA               = 0x4C,
        PDC_DIMM0_CONTROL               = 0x80,
        PDC_DIMM1_CONTROL               = 0x84,
        PDC_SDRAM_CONTROL               = 0x88,
        PDC_I2C_WRITE                   = 0,            /* master -> slave */
        PDC_I2C_READ                    = (1 << 6),     /* master <- slave */
        PDC_I2C_START                   = (1 << 7),     /* start I2C proto */
        PDC_I2C_MASK_INT                = (1 << 5),     /* mask I2C interrupt */
        PDC_I2C_COMPLETE                = (1 << 16),    /* I2C normal compl. */
        PDC_I2C_NO_ACK                  = (1 << 20),    /* slave no-ack addr */
        PDC_DIMM_SPD_SUBADDRESS_START   = 0x00,
        PDC_DIMM_SPD_SUBADDRESS_END     = 0x7F,
        PDC_DIMM_SPD_ROW_NUM            = 3,
        PDC_DIMM_SPD_COLUMN_NUM         = 4,
        PDC_DIMM_SPD_MODULE_ROW         = 5,
        PDC_DIMM_SPD_TYPE               = 11,
        PDC_DIMM_SPD_FRESH_RATE         = 12,
        PDC_DIMM_SPD_BANK_NUM           = 17,
        PDC_DIMM_SPD_CAS_LATENCY        = 18,
        PDC_DIMM_SPD_ATTRIBUTE          = 21,
        PDC_DIMM_SPD_ROW_PRE_CHARGE     = 27,
        PDC_DIMM_SPD_ROW_ACTIVE_DELAY   = 28,
        PDC_DIMM_SPD_RAS_CAS_DELAY      = 29,
        PDC_DIMM_SPD_ACTIVE_PRECHARGE   = 30,
        PDC_DIMM_SPD_SYSTEM_FREQ        = 126,
        PDC_CTL_STATUS                  = 0x08,
        PDC_DIMM_WINDOW_CTLR            = 0x0C,
        PDC_TIME_CONTROL                = 0x3C,
        PDC_TIME_PERIOD                 = 0x40,
        PDC_TIME_COUNTER                = 0x44,
        PDC_GENERAL_CTLR                = 0x484,
        PCI_PLL_INIT                    = 0x8A531824,
        PCI_X_TCOUNT                    = 0xEE1E5CFF,

        /* PDC_TIME_CONTROL bits */
        PDC_TIMER_BUZZER                = (1 << 10),
        PDC_TIMER_MODE_PERIODIC         = 0,            /* bits 9:8 == 00 */
        PDC_TIMER_MODE_ONCE             = (1 << 8),     /* bits 9:8 == 01 */
        PDC_TIMER_ENABLE                = (1 << 7),
        PDC_TIMER_MASK_INT              = (1 << 5),
        PDC_TIMER_SEQ_MASK              = 0x1f,         /* SEQ ID for timer */
        PDC_TIMER_DEFAULT               = PDC_TIMER_MODE_ONCE |
                                          PDC_TIMER_ENABLE |
                                          PDC_TIMER_MASK_INT,
};

#define ECC_ERASE_BUF_SZ (128 * 1024)

struct pdc_port_priv {
        u8                      dimm_buf[(ATA_PRD_SZ * ATA_MAX_PRD) + 512];
        u8                      *pkt;
        dma_addr_t              pkt_dma;
};

struct pdc_host_priv {
        unsigned int            doing_hdma;
        unsigned int            hdma_prod;
        unsigned int            hdma_cons;
        struct {
                struct ata_queued_cmd *qc;
                unsigned int    seq;
                unsigned long   pkt_ofs;
        } hdma[32];
};


static int pdc_sata_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
static void pdc_error_handler(struct ata_port *ap);
static void pdc_freeze(struct ata_port *ap);
static void pdc_thaw(struct ata_port *ap);
static int pdc_port_start(struct ata_port *ap);
static enum ata_completion_errors pdc20621_qc_prep(struct ata_queued_cmd *qc);
static void pdc_tf_load_mmio(struct ata_port *ap, const struct ata_taskfile *tf);
static void pdc_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile *tf);
static unsigned int pdc20621_dimm_init(struct ata_host *host);
static int pdc20621_detect_dimm(struct ata_host *host);
static unsigned int pdc20621_i2c_read(struct ata_host *host,
                                      u32 device, u32 subaddr, u32 *pdata);
static int pdc20621_prog_dimm0(struct ata_host *host);
static unsigned int pdc20621_prog_dimm_global(struct ata_host *host);
static void pdc20621_get_from_dimm(struct ata_host *host,
                                   void *psource, u32 offset, u32 size);
static void pdc20621_put_to_dimm(struct ata_host *host,
                                 void *psource, u32 offset, u32 size);
static void pdc20621_irq_clear(struct ata_port *ap);
static unsigned int pdc20621_qc_issue(struct ata_queued_cmd *qc);
static int pdc_softreset(struct ata_link *link, unsigned int *class,
                         unsigned long deadline);
static void pdc_post_internal_cmd(struct ata_queued_cmd *qc);
static int pdc_check_atapi_dma(struct ata_queued_cmd *qc);


static const struct scsi_host_template pdc_sata_sht = {
        ATA_BASE_SHT(DRV_NAME),
        .sg_tablesize           = LIBATA_MAX_PRD,
        .dma_boundary           = ATA_DMA_BOUNDARY,
};

static struct ata_port_operations pdc_20621_ops = {
        .inherits               = &ata_sff_port_ops,

        .check_atapi_dma        = pdc_check_atapi_dma,
        .qc_prep                = pdc20621_qc_prep,
        .qc_issue               = pdc20621_qc_issue,

        .freeze                 = pdc_freeze,
        .thaw                   = pdc_thaw,
        .reset.softreset        = pdc_softreset,
        .error_handler          = pdc_error_handler,
        .lost_interrupt         = ATA_OP_NULL,
        .post_internal_cmd      = pdc_post_internal_cmd,

        .port_start             = pdc_port_start,

        .sff_tf_load            = pdc_tf_load_mmio,
        .sff_exec_command       = pdc_exec_command_mmio,
        .sff_irq_clear          = pdc20621_irq_clear,
};

static const struct ata_port_info pdc_port_info[] = {
        /* board_20621 */
        {
                .flags          = ATA_FLAG_SATA | ATA_FLAG_NO_ATAPI |
                                  ATA_FLAG_PIO_POLLING,
                .pio_mask       = ATA_PIO4,
                .mwdma_mask     = ATA_MWDMA2,
                .udma_mask      = ATA_UDMA6,
                .port_ops       = &pdc_20621_ops,
        },

};

static const struct pci_device_id pdc_sata_pci_tbl[] = {
        { PCI_VDEVICE(PROMISE, 0x6622), board_20621 },

        { }     /* terminate list */
};

static struct pci_driver pdc_sata_pci_driver = {
        .name                   = DRV_NAME,
        .id_table               = pdc_sata_pci_tbl,
        .probe                  = pdc_sata_init_one,
        .remove                 = ata_pci_remove_one,
};


static int pdc_port_start(struct ata_port *ap)
{
        struct device *dev = ap->host->dev;
        struct pdc_port_priv *pp;

        pp = devm_kzalloc(dev, sizeof(*pp), GFP_KERNEL);
        if (!pp)
                return -ENOMEM;

        pp->pkt = dmam_alloc_coherent(dev, 128, &pp->pkt_dma, GFP_KERNEL);
        if (!pp->pkt)
                return -ENOMEM;

        ap->private_data = pp;

        return 0;
}

static inline void pdc20621_ata_sg(u8 *buf, unsigned int portno,
                                   unsigned int total_len)
{
        u32 addr;
        unsigned int dw = PDC_DIMM_APKT_PRD >> 2;
        __le32 *buf32 = (__le32 *) buf;

        /* output ATA packet S/G table */
        addr = PDC_20621_DIMM_BASE + PDC_20621_DIMM_DATA +
               (PDC_DIMM_DATA_STEP * portno);

        buf32[dw] = cpu_to_le32(addr);
        buf32[dw + 1] = cpu_to_le32(total_len | ATA_PRD_EOT);
}

static inline void pdc20621_host_sg(u8 *buf, unsigned int portno,
                                    unsigned int total_len)
{
        u32 addr;
        unsigned int dw = PDC_DIMM_HPKT_PRD >> 2;
        __le32 *buf32 = (__le32 *) buf;

        /* output Host DMA packet S/G table */
        addr = PDC_20621_DIMM_BASE + PDC_20621_DIMM_DATA +
               (PDC_DIMM_DATA_STEP * portno);

        buf32[dw] = cpu_to_le32(addr);
        buf32[dw + 1] = cpu_to_le32(total_len | ATA_PRD_EOT);
}

static inline unsigned int pdc20621_ata_pkt(struct ata_taskfile *tf,
                                            unsigned int devno, u8 *buf,
                                            unsigned int portno)
{
        unsigned int i, dw;
        __le32 *buf32 = (__le32 *) buf;
        u8 dev_reg;

        unsigned int dimm_sg = PDC_20621_DIMM_BASE +
                               (PDC_DIMM_WINDOW_STEP * portno) +
                               PDC_DIMM_APKT_PRD;

        i = PDC_DIMM_ATA_PKT;

        /*
         * Set up ATA packet
         */
        if ((tf->protocol == ATA_PROT_DMA) && (!(tf->flags & ATA_TFLAG_WRITE)))
                buf[i++] = PDC_PKT_READ;
        else if (tf->protocol == ATA_PROT_NODATA)
                buf[i++] = PDC_PKT_NODATA;
        else
                buf[i++] = 0;
        buf[i++] = 0;                   /* reserved */
        buf[i++] = portno + 1;          /* seq. id */
        buf[i++] = 0xff;                /* delay seq. id */

        /* dimm dma S/G, and next-pkt */
        dw = i >> 2;
        if (tf->protocol == ATA_PROT_NODATA)
                buf32[dw] = 0;
        else
                buf32[dw] = cpu_to_le32(dimm_sg);
        buf32[dw + 1] = 0;
        i += 8;

        if (devno == 0)
                dev_reg = ATA_DEVICE_OBS;
        else
                dev_reg = ATA_DEVICE_OBS | ATA_DEV1;

        /* select device */
        buf[i++] = (1 << 5) | PDC_PKT_CLEAR_BSY | ATA_REG_DEVICE;
        buf[i++] = dev_reg;

        /* device control register */
        buf[i++] = (1 << 5) | PDC_REG_DEVCTL;
        buf[i++] = tf->ctl;

        return i;
}

static inline void pdc20621_host_pkt(struct ata_taskfile *tf, u8 *buf,
                                     unsigned int portno)
{
        unsigned int dw;
        u32 tmp;
        __le32 *buf32 = (__le32 *) buf;

        unsigned int host_sg = PDC_20621_DIMM_BASE +
                               (PDC_DIMM_WINDOW_STEP * portno) +
                               PDC_DIMM_HOST_PRD;
        unsigned int dimm_sg = PDC_20621_DIMM_BASE +
                               (PDC_DIMM_WINDOW_STEP * portno) +
                               PDC_DIMM_HPKT_PRD;

        dw = PDC_DIMM_HOST_PKT >> 2;

        /*
         * Set up Host DMA packet
         */
        if ((tf->protocol == ATA_PROT_DMA) && (!(tf->flags & ATA_TFLAG_WRITE)))
                tmp = PDC_PKT_READ;
        else
                tmp = 0;
        tmp |= ((portno + 1 + 4) << 16);        /* seq. id */
        tmp |= (0xff << 24);                    /* delay seq. id */
        buf32[dw + 0] = cpu_to_le32(tmp);
        buf32[dw + 1] = cpu_to_le32(host_sg);
        buf32[dw + 2] = cpu_to_le32(dimm_sg);
        buf32[dw + 3] = 0;
}

static void pdc20621_dma_prep(struct ata_queued_cmd *qc)
{
        struct scatterlist *sg;
        struct ata_port *ap = qc->ap;
        struct pdc_port_priv *pp = ap->private_data;
        void __iomem *mmio = ap->host->iomap[PDC_MMIO_BAR];
        void __iomem *dimm_mmio = ap->host->iomap[PDC_DIMM_BAR];
        unsigned int portno = ap->port_no;
        unsigned int i, si, idx, total_len = 0, sgt_len;
        __le32 *buf = (__le32 *) &pp->dimm_buf[PDC_DIMM_HEADER_SZ];

        WARN_ON(!(qc->flags & ATA_QCFLAG_DMAMAP));

        /* hard-code chip #0 */
        mmio += PDC_CHIP0_OFS;

        /*
         * Build S/G table
         */
        idx = 0;
        for_each_sg(qc->sg, sg, qc->n_elem, si) {
                buf[idx++] = cpu_to_le32(sg_dma_address(sg));
                buf[idx++] = cpu_to_le32(sg_dma_len(sg));
                total_len += sg_dma_len(sg);
        }
        buf[idx - 1] |= cpu_to_le32(ATA_PRD_EOT);
        sgt_len = idx * 4;

        /*
         * Build ATA, host DMA packets
         */
        pdc20621_host_sg(&pp->dimm_buf[0], portno, total_len);
        pdc20621_host_pkt(&qc->tf, &pp->dimm_buf[0], portno);

        pdc20621_ata_sg(&pp->dimm_buf[0], portno, total_len);
        i = pdc20621_ata_pkt(&qc->tf, qc->dev->devno, &pp->dimm_buf[0], portno);

        if (qc->tf.flags & ATA_TFLAG_LBA48)
                i = pdc_prep_lba48(&qc->tf, &pp->dimm_buf[0], i);
        else
                i = pdc_prep_lba28(&qc->tf, &pp->dimm_buf[0], i);

        pdc_pkt_footer(&qc->tf, &pp->dimm_buf[0], i);

        /* copy three S/G tables and two packets to DIMM MMIO window */
        memcpy_toio(dimm_mmio + (portno * PDC_DIMM_WINDOW_STEP),
                    &pp->dimm_buf, PDC_DIMM_HEADER_SZ);
        memcpy_toio(dimm_mmio + (portno * PDC_DIMM_WINDOW_STEP) +
                    PDC_DIMM_HOST_PRD,
                    &pp->dimm_buf[PDC_DIMM_HEADER_SZ], sgt_len);

        /* force host FIFO dump */
        writel(0x00000001, mmio + PDC_20621_GENERAL_CTL);

        readl(dimm_mmio);       /* MMIO PCI posting flush */

        ata_port_dbg(ap, "ata pkt buf ofs %u, prd size %u, mmio copied\n",
                     i, sgt_len);
}

static void pdc20621_nodata_prep(struct ata_queued_cmd *qc)
{
        struct ata_port *ap = qc->ap;
        struct pdc_port_priv *pp = ap->private_data;
        void __iomem *mmio = ap->host->iomap[PDC_MMIO_BAR];
        void __iomem *dimm_mmio = ap->host->iomap[PDC_DIMM_BAR];
        unsigned int portno = ap->port_no;
        unsigned int i;

        /* hard-code chip #0 */
        mmio += PDC_CHIP0_OFS;

        i = pdc20621_ata_pkt(&qc->tf, qc->dev->devno, &pp->dimm_buf[0], portno);

        if (qc->tf.flags & ATA_TFLAG_LBA48)
                i = pdc_prep_lba48(&qc->tf, &pp->dimm_buf[0], i);
        else
                i = pdc_prep_lba28(&qc->tf, &pp->dimm_buf[0], i);

        pdc_pkt_footer(&qc->tf, &pp->dimm_buf[0], i);

        /* copy three S/G tables and two packets to DIMM MMIO window */
        memcpy_toio(dimm_mmio + (portno * PDC_DIMM_WINDOW_STEP),
                    &pp->dimm_buf, PDC_DIMM_HEADER_SZ);

        /* force host FIFO dump */
        writel(0x00000001, mmio + PDC_20621_GENERAL_CTL);

        readl(dimm_mmio);       /* MMIO PCI posting flush */

        ata_port_dbg(ap, "ata pkt buf ofs %u, mmio copied\n", i);
}

static enum ata_completion_errors pdc20621_qc_prep(struct ata_queued_cmd *qc)
{
        switch (qc->tf.protocol) {
        case ATA_PROT_DMA:
                pdc20621_dma_prep(qc);
                break;
        case ATA_PROT_NODATA:
                pdc20621_nodata_prep(qc);
                break;
        default:
                break;
        }

        return AC_ERR_OK;
}

static void __pdc20621_push_hdma(struct ata_queued_cmd *qc,
                                 unsigned int seq,
                                 u32 pkt_ofs)
{
        struct ata_port *ap = qc->ap;
        struct ata_host *host = ap->host;
        void __iomem *mmio = host->iomap[PDC_MMIO_BAR];

        /* hard-code chip #0 */
        mmio += PDC_CHIP0_OFS;

        writel(0x00000001, mmio + PDC_20621_SEQCTL + (seq * 4));
        readl(mmio + PDC_20621_SEQCTL + (seq * 4));     /* flush */

        writel(pkt_ofs, mmio + PDC_HDMA_PKT_SUBMIT);
        readl(mmio + PDC_HDMA_PKT_SUBMIT);      /* flush */
}

static void pdc20621_push_hdma(struct ata_queued_cmd *qc,
                                unsigned int seq,
                                u32 pkt_ofs)
{
        struct ata_port *ap = qc->ap;
        struct pdc_host_priv *pp = ap->host->private_data;
        unsigned int idx = pp->hdma_prod & PDC_HDMA_Q_MASK;

        if (!pp->doing_hdma) {
                __pdc20621_push_hdma(qc, seq, pkt_ofs);
                pp->doing_hdma = 1;
                return;
        }

        pp->hdma[idx].qc = qc;
        pp->hdma[idx].seq = seq;
        pp->hdma[idx].pkt_ofs = pkt_ofs;
        pp->hdma_prod++;
}

static void pdc20621_pop_hdma(struct ata_queued_cmd *qc)
{
        struct ata_port *ap = qc->ap;
        struct pdc_host_priv *pp = ap->host->private_data;
        unsigned int idx = pp->hdma_cons & PDC_HDMA_Q_MASK;

        /* if nothing on queue, we're done */
        if (pp->hdma_prod == pp->hdma_cons) {
                pp->doing_hdma = 0;
                return;
        }

        __pdc20621_push_hdma(pp->hdma[idx].qc, pp->hdma[idx].seq,
                             pp->hdma[idx].pkt_ofs);
        pp->hdma_cons++;
}

static void pdc20621_dump_hdma(struct ata_queued_cmd *qc)
{
        struct ata_port *ap = qc->ap;
        unsigned int port_no = ap->port_no;
        void __iomem *dimm_mmio = ap->host->iomap[PDC_DIMM_BAR];

        dimm_mmio += (port_no * PDC_DIMM_WINDOW_STEP);
        dimm_mmio += PDC_DIMM_HOST_PKT;

        ata_port_dbg(ap, "HDMA 0x%08X 0x%08X 0x%08X 0x%08X\n",
                     readl(dimm_mmio), readl(dimm_mmio + 4),
                     readl(dimm_mmio + 8), readl(dimm_mmio + 12));
}

static void pdc20621_packet_start(struct ata_queued_cmd *qc)
{
        struct ata_port *ap = qc->ap;
        struct ata_host *host = ap->host;
        unsigned int port_no = ap->port_no;
        void __iomem *mmio = host->iomap[PDC_MMIO_BAR];
        unsigned int rw = (qc->tf.flags & ATA_TFLAG_WRITE);
        u8 seq = (u8) (port_no + 1);
        unsigned int port_ofs;

        /* hard-code chip #0 */
        mmio += PDC_CHIP0_OFS;

        wmb();                  /* flush PRD, pkt writes */

        port_ofs = PDC_20621_DIMM_BASE + (PDC_DIMM_WINDOW_STEP * port_no);

        /* if writing, we (1) DMA to DIMM, then (2) do ATA command */
        if (rw && qc->tf.protocol == ATA_PROT_DMA) {
                seq += 4;

                pdc20621_dump_hdma(qc);
                pdc20621_push_hdma(qc, seq, port_ofs + PDC_DIMM_HOST_PKT);
                ata_port_dbg(ap, "queued ofs 0x%x (%u), seq %u\n",
                        port_ofs + PDC_DIMM_HOST_PKT,
                        port_ofs + PDC_DIMM_HOST_PKT,
                        seq);
        } else {
                writel(0x00000001, mmio + PDC_20621_SEQCTL + (seq * 4));
                readl(mmio + PDC_20621_SEQCTL + (seq * 4));     /* flush */

                writel(port_ofs + PDC_DIMM_ATA_PKT,
                       ap->ioaddr.cmd_addr + PDC_PKT_SUBMIT);
                readl(ap->ioaddr.cmd_addr + PDC_PKT_SUBMIT);
                ata_port_dbg(ap, "submitted ofs 0x%x (%u), seq %u\n",
                        port_ofs + PDC_DIMM_ATA_PKT,
                        port_ofs + PDC_DIMM_ATA_PKT,
                        seq);
        }
}

static unsigned int pdc20621_qc_issue(struct ata_queued_cmd *qc)
{
        switch (qc->tf.protocol) {
        case ATA_PROT_NODATA:
                if (qc->tf.flags & ATA_TFLAG_POLLING)
                        break;
                fallthrough;
        case ATA_PROT_DMA:
                pdc20621_packet_start(qc);
                return 0;

        case ATAPI_PROT_DMA:
                BUG();
                break;

        default:
                break;
        }

        return ata_sff_qc_issue(qc);
}

static inline unsigned int pdc20621_host_intr(struct ata_port *ap,
                                          struct ata_queued_cmd *qc,
                                          unsigned int doing_hdma,
                                          void __iomem *mmio)
{
        unsigned int port_no = ap->port_no;
        unsigned int port_ofs =
                PDC_20621_DIMM_BASE + (PDC_DIMM_WINDOW_STEP * port_no);
        u8 status;
        unsigned int handled = 0;

        if ((qc->tf.protocol == ATA_PROT_DMA) &&        /* read */
            (!(qc->tf.flags & ATA_TFLAG_WRITE))) {

                /* step two - DMA from DIMM to host */
                if (doing_hdma) {
                        ata_port_dbg(ap, "read hdma, 0x%x 0x%x\n",
                                readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT));
                        /* get drive status; clear intr; complete txn */
                        qc->err_mask |= ac_err_mask(ata_wait_idle(ap));
                        ata_qc_complete(qc);
                        pdc20621_pop_hdma(qc);
                }

                /* step one - exec ATA command */
                else {
                        u8 seq = (u8) (port_no + 1 + 4);
                        ata_port_dbg(ap, "read ata, 0x%x 0x%x\n",
                                readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT));

                        /* submit hdma pkt */
                        pdc20621_dump_hdma(qc);
                        pdc20621_push_hdma(qc, seq,
                                           port_ofs + PDC_DIMM_HOST_PKT);
                }
                handled = 1;

        } else if (qc->tf.protocol == ATA_PROT_DMA) {   /* write */

                /* step one - DMA from host to DIMM */
                if (doing_hdma) {
                        u8 seq = (u8) (port_no + 1);
                        ata_port_dbg(ap, "write hdma, 0x%x 0x%x\n",
                                readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT));

                        /* submit ata pkt */
                        writel(0x00000001, mmio + PDC_20621_SEQCTL + (seq * 4));
                        readl(mmio + PDC_20621_SEQCTL + (seq * 4));
                        writel(port_ofs + PDC_DIMM_ATA_PKT,
                               ap->ioaddr.cmd_addr + PDC_PKT_SUBMIT);
                        readl(ap->ioaddr.cmd_addr + PDC_PKT_SUBMIT);
                }

                /* step two - execute ATA command */
                else {
                        ata_port_dbg(ap, "write ata, 0x%x 0x%x\n",
                                readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT));
                        /* get drive status; clear intr; complete txn */
                        qc->err_mask |= ac_err_mask(ata_wait_idle(ap));
                        ata_qc_complete(qc);
                        pdc20621_pop_hdma(qc);
                }
                handled = 1;

        /* command completion, but no data xfer */
        } else if (qc->tf.protocol == ATA_PROT_NODATA) {

                status = ata_sff_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000);
                ata_port_dbg(ap, "BUS_NODATA (drv_stat 0x%X)\n", status);
                qc->err_mask |= ac_err_mask(status);
                ata_qc_complete(qc);
                handled = 1;

        } else {
                ap->stats.idle_irq++;
        }

        return handled;
}

static void pdc20621_irq_clear(struct ata_port *ap)
{
        ioread8(ap->ioaddr.status_addr);
}

static irqreturn_t pdc20621_interrupt(int irq, void *dev_instance)
{
        struct ata_host *host = dev_instance;
        struct ata_port *ap;
        u32 mask = 0;
        unsigned int i, tmp, port_no;
        unsigned int handled = 0;
        void __iomem *mmio_base;

        if (!host || !host->iomap[PDC_MMIO_BAR])
                return IRQ_NONE;

        mmio_base = host->iomap[PDC_MMIO_BAR];

        /* reading should also clear interrupts */
        mmio_base += PDC_CHIP0_OFS;
        mask = readl(mmio_base + PDC_20621_SEQMASK);

        if (mask == 0xffffffff)
                return IRQ_NONE;

        mask &= 0xffff;         /* only 16 tags possible */
        if (!mask)
                return IRQ_NONE;

        spin_lock(&host->lock);

        for (i = 1; i < 9; i++) {
                port_no = i - 1;
                if (port_no > 3)
                        port_no -= 4;
                if (port_no >= host->n_ports)
                        ap = NULL;
                else
                        ap = host->ports[port_no];
                tmp = mask & (1 << i);
                if (ap)
                        ata_port_dbg(ap, "seq %u, tmp %x\n", i, tmp);
                if (tmp && ap) {
                        struct ata_queued_cmd *qc;

                        qc = ata_qc_from_tag(ap, ap->link.active_tag);
                        if (qc && (!(qc->tf.flags & ATA_TFLAG_POLLING)))
                                handled += pdc20621_host_intr(ap, qc, (i > 4),
                                                              mmio_base);
                }
        }

        spin_unlock(&host->lock);

        return IRQ_RETVAL(handled);
}

static void pdc_freeze(struct ata_port *ap)
{
        void __iomem *mmio = ap->ioaddr.cmd_addr;
        u32 tmp;

        /* FIXME: if all 4 ATA engines are stopped, also stop HDMA engine */

        tmp = readl(mmio + PDC_CTLSTAT);
        tmp |= PDC_MASK_INT;
        tmp &= ~PDC_DMA_ENABLE;
        writel(tmp, mmio + PDC_CTLSTAT);
        readl(mmio + PDC_CTLSTAT); /* flush */
}

static void pdc_thaw(struct ata_port *ap)
{
        void __iomem *mmio = ap->ioaddr.cmd_addr;
        u32 tmp;

        /* FIXME: start HDMA engine, if zero ATA engines running */

        /* clear IRQ */
        ioread8(ap->ioaddr.status_addr);

        /* turn IRQ back on */
        tmp = readl(mmio + PDC_CTLSTAT);
        tmp &= ~PDC_MASK_INT;
        writel(tmp, mmio + PDC_CTLSTAT);
        readl(mmio + PDC_CTLSTAT); /* flush */
}

static void pdc_reset_port(struct ata_port *ap)
{
        void __iomem *mmio = ap->ioaddr.cmd_addr + PDC_CTLSTAT;
        unsigned int i;
        u32 tmp;

        /* FIXME: handle HDMA copy engine */

        for (i = 11; i > 0; i--) {
                tmp = readl(mmio);
                if (tmp & PDC_RESET)
                        break;

                udelay(100);

                tmp |= PDC_RESET;
                writel(tmp, mmio);
        }

        tmp &= ~PDC_RESET;
        writel(tmp, mmio);
        readl(mmio);    /* flush */
}

static int pdc_softreset(struct ata_link *link, unsigned int *class,
                         unsigned long deadline)
{
        pdc_reset_port(link->ap);
        return ata_sff_softreset(link, class, deadline);
}

static void pdc_error_handler(struct ata_port *ap)
{
        if (!ata_port_is_frozen(ap))
                pdc_reset_port(ap);

        ata_sff_error_handler(ap);
}

static void pdc_post_internal_cmd(struct ata_queued_cmd *qc)
{
        struct ata_port *ap = qc->ap;

        /* make DMA engine forget about the failed command */
        if (qc->flags & ATA_QCFLAG_EH)
                pdc_reset_port(ap);
}

static int pdc_check_atapi_dma(struct ata_queued_cmd *qc)
{
        u8 *scsicmd = qc->scsicmd->cmnd;
        int pio = 1; /* atapi dma off by default */

        /* Whitelist commands that may use DMA. */
        switch (scsicmd[0]) {
        case WRITE_12:
        case WRITE_10:
        case WRITE_6:
        case READ_12:
        case READ_10:
        case READ_6:
        case 0xad: /* READ_DVD_STRUCTURE */
        case 0xbe: /* READ_CD */
                pio = 0;
        }
        /* -45150 (FFFF4FA2) to -1 (FFFFFFFF) shall use PIO mode */
        if (scsicmd[0] == WRITE_10) {
                unsigned int lba =
                        (scsicmd[2] << 24) |
                        (scsicmd[3] << 16) |
                        (scsicmd[4] << 8) |
                        scsicmd[5];
                if (lba >= 0xFFFF4FA2)
                        pio = 1;
        }
        return pio;
}

static void pdc_tf_load_mmio(struct ata_port *ap, const struct ata_taskfile *tf)
{
        WARN_ON(tf->protocol == ATA_PROT_DMA ||
                tf->protocol == ATAPI_PROT_DMA);
        ata_sff_tf_load(ap, tf);
}


static void pdc_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile *tf)
{
        WARN_ON(tf->protocol == ATA_PROT_DMA ||
                tf->protocol == ATAPI_PROT_DMA);
        ata_sff_exec_command(ap, tf);
}


static void pdc_sata_setup_port(struct ata_ioports *port, void __iomem *base)
{
        port->cmd_addr          = base;
        port->data_addr         = base;
        port->feature_addr      =
        port->error_addr        = base + 0x4;
        port->nsect_addr        = base + 0x8;
        port->lbal_addr         = base + 0xc;
        port->lbam_addr         = base + 0x10;
        port->lbah_addr         = base + 0x14;
        port->device_addr       = base + 0x18;
        port->command_addr      =
        port->status_addr       = base + 0x1c;
        port->altstatus_addr    =
        port->ctl_addr          = base + 0x38;
}


static void pdc20621_get_from_dimm(struct ata_host *host, void *psource,
                                   u32 offset, u32 size)
{
        u32 window_size;
        u16 idx;
        u8 page_mask;
        long dist;
        void __iomem *mmio = host->iomap[PDC_MMIO_BAR];
        void __iomem *dimm_mmio = host->iomap[PDC_DIMM_BAR];

        /* hard-code chip #0 */
        mmio += PDC_CHIP0_OFS;

        page_mask = 0x00;
        window_size = 0x2000 * 4; /* 32K byte uchar size */
        idx = (u16) (offset / window_size);

        writel(0x01, mmio + PDC_GENERAL_CTLR);
        readl(mmio + PDC_GENERAL_CTLR);
        writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR);
        readl(mmio + PDC_DIMM_WINDOW_CTLR);

        offset -= (idx * window_size);
        idx++;
        dist = min(size, window_size - offset);
        memcpy_fromio(psource, dimm_mmio + offset / 4, dist);

        psource += dist;
        size -= dist;
        for (; (long) size >= (long) window_size ;) {
                writel(0x01, mmio + PDC_GENERAL_CTLR);
                readl(mmio + PDC_GENERAL_CTLR);
                writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR);
                readl(mmio + PDC_DIMM_WINDOW_CTLR);
                memcpy_fromio(psource, dimm_mmio, window_size / 4);
                psource += window_size;
                size -= window_size;
                idx++;
        }

        if (size) {
                writel(0x01, mmio + PDC_GENERAL_CTLR);
                readl(mmio + PDC_GENERAL_CTLR);
                writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR);
                readl(mmio + PDC_DIMM_WINDOW_CTLR);
                memcpy_fromio(psource, dimm_mmio, size / 4);
        }
}


static void pdc20621_put_to_dimm(struct ata_host *host, void *psource,
                                 u32 offset, u32 size)
{
        u32 window_size;
        u16 idx;
        u8 page_mask;
        long dist;
        void __iomem *mmio = host->iomap[PDC_MMIO_BAR];
        void __iomem *dimm_mmio = host->iomap[PDC_DIMM_BAR];

        /* hard-code chip #0 */
        mmio += PDC_CHIP0_OFS;

        page_mask = 0x00;
        window_size = 0x2000 * 4;       /* 32K byte uchar size */
        idx = (u16) (offset / window_size);

        writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR);
        readl(mmio + PDC_DIMM_WINDOW_CTLR);
        offset -= (idx * window_size);
        idx++;
        dist = min(size, window_size - offset);
        memcpy_toio(dimm_mmio + offset / 4, psource, dist);
        writel(0x01, mmio + PDC_GENERAL_CTLR);
        readl(mmio + PDC_GENERAL_CTLR);

        psource += dist;
        size -= dist;
        for (; (long) size >= (long) window_size ;) {
                writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR);
                readl(mmio + PDC_DIMM_WINDOW_CTLR);
                memcpy_toio(dimm_mmio, psource, window_size / 4);
                writel(0x01, mmio + PDC_GENERAL_CTLR);
                readl(mmio + PDC_GENERAL_CTLR);
                psource += window_size;
                size -= window_size;
                idx++;
        }

        if (size) {
                writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR);
                readl(mmio + PDC_DIMM_WINDOW_CTLR);
                memcpy_toio(dimm_mmio, psource, size / 4);
                writel(0x01, mmio + PDC_GENERAL_CTLR);
                readl(mmio + PDC_GENERAL_CTLR);
        }
}


static unsigned int pdc20621_i2c_read(struct ata_host *host, u32 device,
                                      u32 subaddr, u32 *pdata)
{
        void __iomem *mmio = host->iomap[PDC_MMIO_BAR];
        u32 i2creg  = 0;
        u32 status;
        u32 count = 0;

        /* hard-code chip #0 */
        mmio += PDC_CHIP0_OFS;

        i2creg |= device << 24;
        i2creg |= subaddr << 16;

        /* Set the device and subaddress */
        writel(i2creg, mmio + PDC_I2C_ADDR_DATA);
        readl(mmio + PDC_I2C_ADDR_DATA);

        /* Write Control to perform read operation, mask int */
        writel(PDC_I2C_READ | PDC_I2C_START | PDC_I2C_MASK_INT,
               mmio + PDC_I2C_CONTROL);

        for (count = 0; count <= 1000; count ++) {
                status = readl(mmio + PDC_I2C_CONTROL);
                if (status & PDC_I2C_COMPLETE) {
                        status = readl(mmio + PDC_I2C_ADDR_DATA);
                        break;
                } else if (count == 1000)
                        return 0;
        }

        *pdata = (status >> 8) & 0x000000ff;
        return 1;
}


static int pdc20621_detect_dimm(struct ata_host *host)
{
        u32 data = 0;
        if (pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS,
                             PDC_DIMM_SPD_SYSTEM_FREQ, &data)) {
                if (data == 100)
                        return 100;
        } else
                return 0;

        if (pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS, 9, &data)) {
                if (data <= 0x75)
                        return 133;
        } else
                return 0;

        return 0;
}


static int pdc20621_prog_dimm0(struct ata_host *host)
{
        u32 spd0[50];
        u32 data = 0;
        int size, i;
        u8 bdimmsize;
        void __iomem *mmio = host->iomap[PDC_MMIO_BAR];
        static const struct {
                unsigned int reg;
                unsigned int ofs;
        } pdc_i2c_read_data [] = {
                { PDC_DIMM_SPD_TYPE, 11 },
                { PDC_DIMM_SPD_FRESH_RATE, 12 },
                { PDC_DIMM_SPD_COLUMN_NUM, 4 },
                { PDC_DIMM_SPD_ATTRIBUTE, 21 },
                { PDC_DIMM_SPD_ROW_NUM, 3 },
                { PDC_DIMM_SPD_BANK_NUM, 17 },
                { PDC_DIMM_SPD_MODULE_ROW, 5 },
                { PDC_DIMM_SPD_ROW_PRE_CHARGE, 27 },
                { PDC_DIMM_SPD_ROW_ACTIVE_DELAY, 28 },
                { PDC_DIMM_SPD_RAS_CAS_DELAY, 29 },
                { PDC_DIMM_SPD_ACTIVE_PRECHARGE, 30 },
                { PDC_DIMM_SPD_CAS_LATENCY, 18 },
        };

        /* hard-code chip #0 */
        mmio += PDC_CHIP0_OFS;

        for (i = 0; i < ARRAY_SIZE(pdc_i2c_read_data); i++)
                if (!pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS,
                                       pdc_i2c_read_data[i].reg,
                                       &spd0[pdc_i2c_read_data[i].ofs])) {
                        dev_err(host->dev,
                                "Failed in i2c read at index %d: device=%#x, reg=%#x\n",
                                i, PDC_DIMM0_SPD_DEV_ADDRESS, pdc_i2c_read_data[i].reg);
                        return -EIO;
                }

        data |= (spd0[4] - 8) | ((spd0[21] != 0) << 3) | ((spd0[3]-11) << 4);
        data |= ((spd0[17] / 4) << 6) | ((spd0[5] / 2) << 7) |
                ((((spd0[27] + 9) / 10) - 1) << 8) ;
        data |= (((((spd0[29] > spd0[28])
                    ? spd0[29] : spd0[28]) + 9) / 10) - 1) << 10;
        data |= ((spd0[30] - spd0[29] + 9) / 10 - 2) << 12;

        if (spd0[18] & 0x08)
                data |= ((0x03) << 14);
        else if (spd0[18] & 0x04)
                data |= ((0x02) << 14);
        else if (spd0[18] & 0x01)
                data |= ((0x01) << 14);
        else
                data |= (0 << 14);

        /*
           Calculate the size of bDIMMSize (power of 2) and
           merge the DIMM size by program start/end address.
        */

        bdimmsize = spd0[4] + (spd0[5] / 2) + spd0[3] + (spd0[17] / 2) + 3;
        size = (1 << bdimmsize) >> 20;  /* size = xxx(MB) */
        data |= (((size / 16) - 1) << 16);
        data |= (0 << 23);
        data |= 8;
        writel(data, mmio + PDC_DIMM0_CONTROL);
        readl(mmio + PDC_DIMM0_CONTROL);
        return size;
}


static unsigned int pdc20621_prog_dimm_global(struct ata_host *host)
{
        u32 data, spd0;
        int error, i;
        void __iomem *mmio = host->iomap[PDC_MMIO_BAR];

        /* hard-code chip #0 */
        mmio += PDC_CHIP0_OFS;

        /*
          Set To Default : DIMM Module Global Control Register (0x022259F1)
          DIMM Arbitration Disable (bit 20)
          DIMM Data/Control Output Driving Selection (bit12 - bit15)
          Refresh Enable (bit 17)
        */

        data = 0x022259F1;
        writel(data, mmio + PDC_SDRAM_CONTROL);
        readl(mmio + PDC_SDRAM_CONTROL);

        /* Turn on for ECC */
        if (!pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS,
                               PDC_DIMM_SPD_TYPE, &spd0)) {
                dev_err(host->dev,
                        "Failed in i2c read: device=%#x, subaddr=%#x\n",
                        PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_TYPE);
                return 1;
        }
        if (spd0 == 0x02) {
                data |= (0x01 << 16);
                writel(data, mmio + PDC_SDRAM_CONTROL);
                readl(mmio + PDC_SDRAM_CONTROL);
                dev_err(host->dev, "Local DIMM ECC Enabled\n");
        }

        /* DIMM Initialization Select/Enable (bit 18/19) */
        data &= (~(1<<18));
        data |= (1<<19);
        writel(data, mmio + PDC_SDRAM_CONTROL);

        error = 1;
        for (i = 1; i <= 10; i++) {   /* polling ~5 secs */
                data = readl(mmio + PDC_SDRAM_CONTROL);
                if (!(data & (1<<19))) {
                        error = 0;
                        break;
                }
                msleep(i*100);
        }
        return error;
}


static unsigned int pdc20621_dimm_init(struct ata_host *host)
{
        int speed, size, length;
        u32 addr, spd0, pci_status;
        u32 time_period = 0;
        u32 tcount = 0;
        u32 ticks = 0;
        u32 clock = 0;
        u32 fparam = 0;
        void __iomem *mmio = host->iomap[PDC_MMIO_BAR];

        /* hard-code chip #0 */
        mmio += PDC_CHIP0_OFS;

        /* Initialize PLL based upon PCI Bus Frequency */

        /* Initialize Time Period Register */
        writel(0xffffffff, mmio + PDC_TIME_PERIOD);
        time_period = readl(mmio + PDC_TIME_PERIOD);
        dev_dbg(host->dev, "Time Period Register (0x40): 0x%x\n", time_period);

        /* Enable timer */
        writel(PDC_TIMER_DEFAULT, mmio + PDC_TIME_CONTROL);
        readl(mmio + PDC_TIME_CONTROL);

        /* Wait 3 seconds */
        msleep(3000);

        /*
           When timer is enabled, counter is decreased every internal
           clock cycle.
        */

        tcount = readl(mmio + PDC_TIME_COUNTER);
        dev_dbg(host->dev, "Time Counter Register (0x44): 0x%x\n", tcount);

        /*
           If SX4 is on PCI-X bus, after 3 seconds, the timer counter
           register should be >= (0xffffffff - 3x10^8).
        */
        if (tcount >= PCI_X_TCOUNT) {
                ticks = (time_period - tcount);
                dev_dbg(host->dev, "Num counters 0x%x (%d)\n", ticks, ticks);

                clock = (ticks / 300000);
                dev_dbg(host->dev, "10 * Internal clk = 0x%x (%d)\n",
                        clock, clock);

                clock = (clock * 33);
                dev_dbg(host->dev, "10 * Internal clk * 33 = 0x%x (%d)\n",
                        clock, clock);

                /* PLL F Param (bit 22:16) */
                fparam = (1400000 / clock) - 2;
                dev_dbg(host->dev, "PLL F Param: 0x%x (%d)\n", fparam, fparam);

                /* OD param = 0x2 (bit 31:30), R param = 0x5 (bit 29:25) */
                pci_status = (0x8a001824 | (fparam << 16));
        } else
                pci_status = PCI_PLL_INIT;

        /* Initialize PLL. */
        dev_dbg(host->dev, "pci_status: 0x%x\n", pci_status);
        writel(pci_status, mmio + PDC_CTL_STATUS);
        readl(mmio + PDC_CTL_STATUS);

        /*
           Read SPD of DIMM by I2C interface,
           and program the DIMM Module Controller.
        */
        if (!(speed = pdc20621_detect_dimm(host))) {
                dev_err(host->dev, "Detect Local DIMM Fail\n");
                return 1;       /* DIMM error */
        }
        dev_dbg(host->dev, "Local DIMM Speed = %d\n", speed);

        /* Programming DIMM0 Module Control Register (index_CID0:80h) */
        size = pdc20621_prog_dimm0(host);
        if (size < 0)
                return size;
        dev_dbg(host->dev, "Local DIMM Size = %dMB\n", size);

        /* Programming DIMM Module Global Control Register (index_CID0:88h) */
        if (pdc20621_prog_dimm_global(host)) {
                dev_err(host->dev,
                        "Programming DIMM Module Global Control Register Fail\n");
                return 1;
        }

        if (dimm_test) {
                u8 test_pattern1[40] =
                        {0x55,0xAA,'P','r','o','m','i','s','e',' ',
                        'N','o','t',' ','Y','e','t',' ',
                        'D','e','f','i','n','e','d',' ',
                        '1','.','1','0',
                        '9','8','0','3','1','6','1','2',0,0};
                u8 test_pattern2[40] = {0};

                pdc20621_put_to_dimm(host, test_pattern2, 0x10040, 40);
                pdc20621_put_to_dimm(host, test_pattern2, 0x40, 40);

                pdc20621_put_to_dimm(host, test_pattern1, 0x10040, 40);
                pdc20621_get_from_dimm(host, test_pattern2, 0x40, 40);
                dev_info(host->dev, "DIMM test pattern 1: %x, %x, %s\n", test_pattern2[0],
                       test_pattern2[1], &(test_pattern2[2]));
                pdc20621_get_from_dimm(host, test_pattern2, 0x10040,
                                       40);
                dev_info(host->dev, "DIMM test pattern 2: %x, %x, %s\n",
                         test_pattern2[0],
                         test_pattern2[1], &(test_pattern2[2]));

                pdc20621_put_to_dimm(host, test_pattern1, 0x40, 40);
                pdc20621_get_from_dimm(host, test_pattern2, 0x40, 40);
                dev_info(host->dev, "DIMM test pattern 3: %x, %x, %s\n",
                         test_pattern2[0],
                         test_pattern2[1], &(test_pattern2[2]));
        }

        /* ECC initiliazation. */

        if (!pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS,
                               PDC_DIMM_SPD_TYPE, &spd0)) {
                dev_err(host->dev,
                        "Failed in i2c read: device=%#x, subaddr=%#x\n",
                       PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_TYPE);
                return 1;
        }
        if (spd0 == 0x02) {
                void *buf;
                dev_dbg(host->dev, "Start ECC initialization\n");
                addr = 0;
                length = size * 1024 * 1024;
                buf = kzalloc(ECC_ERASE_BUF_SZ, GFP_KERNEL);
                if (!buf)
                        return 1;
                while (addr < length) {
                        pdc20621_put_to_dimm(host, buf, addr,
                                             ECC_ERASE_BUF_SZ);
                        addr += ECC_ERASE_BUF_SZ;
                }
                kfree(buf);
                dev_dbg(host->dev, "Finish ECC initialization\n");
        }
        return 0;
}


static void pdc_20621_init(struct ata_host *host)
{
        u32 tmp;
        void __iomem *mmio = host->iomap[PDC_MMIO_BAR];

        /* hard-code chip #0 */
        mmio += PDC_CHIP0_OFS;

        /*
         * Select page 0x40 for our 32k DIMM window
         */
        tmp = readl(mmio + PDC_20621_DIMM_WINDOW) & 0xffff0000;
        tmp |= PDC_PAGE_WINDOW; /* page 40h; arbitrarily selected */
        writel(tmp, mmio + PDC_20621_DIMM_WINDOW);

        /*
         * Reset Host DMA
         */
        tmp = readl(mmio + PDC_HDMA_CTLSTAT);
        tmp |= PDC_RESET;
        writel(tmp, mmio + PDC_HDMA_CTLSTAT);
        readl(mmio + PDC_HDMA_CTLSTAT);         /* flush */

        udelay(10);

        tmp = readl(mmio + PDC_HDMA_CTLSTAT);
        tmp &= ~PDC_RESET;
        writel(tmp, mmio + PDC_HDMA_CTLSTAT);
        readl(mmio + PDC_HDMA_CTLSTAT);         /* flush */
}

static int pdc_sata_init_one(struct pci_dev *pdev,
                             const struct pci_device_id *ent)
{
        const struct ata_port_info *ppi[] =
                { &pdc_port_info[ent->driver_data], NULL };
        struct ata_host *host;
        struct pdc_host_priv *hpriv;
        int i, rc;

        ata_print_version_once(&pdev->dev, DRV_VERSION);

        /* allocate host */
        host = ata_host_alloc_pinfo(&pdev->dev, ppi, 4);
        hpriv = devm_kzalloc(&pdev->dev, sizeof(*hpriv), GFP_KERNEL);
        if (!host || !hpriv)
                return -ENOMEM;

        host->private_data = hpriv;

        /* acquire resources and fill host */
        rc = pcim_enable_device(pdev);
        if (rc)
                return rc;

        rc = pcim_iomap_regions(pdev, (1 << PDC_MMIO_BAR) | (1 << PDC_DIMM_BAR),
                                DRV_NAME);
        if (rc == -EBUSY)
                pcim_pin_device(pdev);
        if (rc)
                return rc;
        host->iomap = pcim_iomap_table(pdev);

        for (i = 0; i < 4; i++) {
                struct ata_port *ap = host->ports[i];
                void __iomem *base = host->iomap[PDC_MMIO_BAR] + PDC_CHIP0_OFS;
                unsigned int offset = 0x200 + i * 0x80;

                pdc_sata_setup_port(&ap->ioaddr, base + offset);

                ata_port_pbar_desc(ap, PDC_MMIO_BAR, -1, "mmio");
                ata_port_pbar_desc(ap, PDC_DIMM_BAR, -1, "dimm");
                ata_port_pbar_desc(ap, PDC_MMIO_BAR, offset, "port");
        }

        /* configure and activate */
        rc = dma_set_mask_and_coherent(&pdev->dev, ATA_DMA_MASK);
        if (rc)
                return rc;

        if (pdc20621_dimm_init(host))
                return -ENOMEM;
        pdc_20621_init(host);

        pci_set_master(pdev);
        return ata_host_activate(host, pdev->irq, pdc20621_interrupt,
                                 IRQF_SHARED, &pdc_sata_sht);
}

module_pci_driver(pdc_sata_pci_driver);

MODULE_AUTHOR("Jeff Garzik");
MODULE_DESCRIPTION("Promise SATA low-level driver");
MODULE_LICENSE("GPL");
MODULE_DEVICE_TABLE(pci, pdc_sata_pci_tbl);
MODULE_VERSION(DRV_VERSION);