root/drivers/crypto/ccp/ccp-dev-v5.c
// SPDX-License-Identifier: GPL-2.0-only
/*
 * AMD Cryptographic Coprocessor (CCP) driver
 *
 * Copyright (C) 2016,2019 Advanced Micro Devices, Inc.
 *
 * Author: Gary R Hook <gary.hook@amd.com>
 */

#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/dma-mapping.h>
#include <linux/interrupt.h>
#include <linux/compiler.h>
#include <linux/ccp.h>

#include "ccp-dev.h"

/* Allocate the requested number of contiguous LSB slots
 * from the LSB bitmap. Look in the private range for this
 * queue first; failing that, check the public area.
 * If no space is available, wait around.
 * Return: first slot number
 */
static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count)
{
        struct ccp_device *ccp;
        int start;

        /* First look at the map for the queue */
        if (cmd_q->lsb >= 0) {
                start = (u32)bitmap_find_next_zero_area(cmd_q->lsbmap,
                                                        LSB_SIZE,
                                                        0, count, 0);
                if (start < LSB_SIZE) {
                        bitmap_set(cmd_q->lsbmap, start, count);
                        return start + cmd_q->lsb * LSB_SIZE;
                }
        }

        /* No joy; try to get an entry from the shared blocks */
        ccp = cmd_q->ccp;
        for (;;) {
                mutex_lock(&ccp->sb_mutex);

                start = (u32)bitmap_find_next_zero_area(ccp->lsbmap,
                                                        MAX_LSB_CNT * LSB_SIZE,
                                                        0,
                                                        count, 0);
                if (start <= MAX_LSB_CNT * LSB_SIZE) {
                        bitmap_set(ccp->lsbmap, start, count);

                        mutex_unlock(&ccp->sb_mutex);
                        return start;
                }

                ccp->sb_avail = 0;

                mutex_unlock(&ccp->sb_mutex);

                /* Wait for KSB entries to become available */
                if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail))
                        return 0;
        }
}

/* Free a number of LSB slots from the bitmap, starting at
 * the indicated starting slot number.
 */
static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start,
                         unsigned int count)
{
        if (!start)
                return;

        if (cmd_q->lsb == start) {
                /* An entry from the private LSB */
                bitmap_clear(cmd_q->lsbmap, start, count);
        } else {
                /* From the shared LSBs */
                struct ccp_device *ccp = cmd_q->ccp;

                mutex_lock(&ccp->sb_mutex);
                bitmap_clear(ccp->lsbmap, start, count);
                ccp->sb_avail = 1;
                mutex_unlock(&ccp->sb_mutex);
                wake_up_interruptible_all(&ccp->sb_queue);
        }
}

/* CCP version 5: Union to define the function field (cmd_reg1/dword0) */
union ccp_function {
        struct {
                u16 size:7;
                u16 encrypt:1;
                u16 mode:5;
                u16 type:2;
        } aes;
        struct {
                u16 size:7;
                u16 encrypt:1;
                u16 rsvd:5;
                u16 type:2;
        } aes_xts;
        struct {
                u16 size:7;
                u16 encrypt:1;
                u16 mode:5;
                u16 type:2;
        } des3;
        struct {
                u16 rsvd1:10;
                u16 type:4;
                u16 rsvd2:1;
        } sha;
        struct {
                u16 mode:3;
                u16 size:12;
        } rsa;
        struct {
                u16 byteswap:2;
                u16 bitwise:3;
                u16 reflect:2;
                u16 rsvd:8;
        } pt;
        struct  {
                u16 rsvd:13;
        } zlib;
        struct {
                u16 size:10;
                u16 type:2;
                u16 mode:3;
        } ecc;
        u16 raw;
};

#define CCP_AES_SIZE(p)         ((p)->aes.size)
#define CCP_AES_ENCRYPT(p)      ((p)->aes.encrypt)
#define CCP_AES_MODE(p)         ((p)->aes.mode)
#define CCP_AES_TYPE(p)         ((p)->aes.type)
#define CCP_XTS_SIZE(p)         ((p)->aes_xts.size)
#define CCP_XTS_TYPE(p)         ((p)->aes_xts.type)
#define CCP_XTS_ENCRYPT(p)      ((p)->aes_xts.encrypt)
#define CCP_DES3_SIZE(p)        ((p)->des3.size)
#define CCP_DES3_ENCRYPT(p)     ((p)->des3.encrypt)
#define CCP_DES3_MODE(p)        ((p)->des3.mode)
#define CCP_DES3_TYPE(p)        ((p)->des3.type)
#define CCP_SHA_TYPE(p)         ((p)->sha.type)
#define CCP_RSA_SIZE(p)         ((p)->rsa.size)
#define CCP_PT_BYTESWAP(p)      ((p)->pt.byteswap)
#define CCP_PT_BITWISE(p)       ((p)->pt.bitwise)
#define CCP_ECC_MODE(p)         ((p)->ecc.mode)
#define CCP_ECC_AFFINE(p)       ((p)->ecc.one)

/* Word 0 */
#define CCP5_CMD_DW0(p)         ((p)->dw0)
#define CCP5_CMD_SOC(p)         (CCP5_CMD_DW0(p).soc)
#define CCP5_CMD_IOC(p)         (CCP5_CMD_DW0(p).ioc)
#define CCP5_CMD_INIT(p)        (CCP5_CMD_DW0(p).init)
#define CCP5_CMD_EOM(p)         (CCP5_CMD_DW0(p).eom)
#define CCP5_CMD_FUNCTION(p)    (CCP5_CMD_DW0(p).function)
#define CCP5_CMD_ENGINE(p)      (CCP5_CMD_DW0(p).engine)
#define CCP5_CMD_PROT(p)        (CCP5_CMD_DW0(p).prot)

/* Word 1 */
#define CCP5_CMD_DW1(p)         ((p)->length)
#define CCP5_CMD_LEN(p)         (CCP5_CMD_DW1(p))

/* Word 2 */
#define CCP5_CMD_DW2(p)         ((p)->src_lo)
#define CCP5_CMD_SRC_LO(p)      (CCP5_CMD_DW2(p))

/* Word 3 */
#define CCP5_CMD_DW3(p)         ((p)->dw3)
#define CCP5_CMD_SRC_MEM(p)     ((p)->dw3.src_mem)
#define CCP5_CMD_SRC_HI(p)      ((p)->dw3.src_hi)
#define CCP5_CMD_LSB_ID(p)      ((p)->dw3.lsb_cxt_id)
#define CCP5_CMD_FIX_SRC(p)     ((p)->dw3.fixed)

/* Words 4/5 */
#define CCP5_CMD_DW4(p)         ((p)->dw4)
#define CCP5_CMD_DST_LO(p)      (CCP5_CMD_DW4(p).dst_lo)
#define CCP5_CMD_DW5(p)         ((p)->dw5.fields.dst_hi)
#define CCP5_CMD_DST_HI(p)      (CCP5_CMD_DW5(p))
#define CCP5_CMD_DST_MEM(p)     ((p)->dw5.fields.dst_mem)
#define CCP5_CMD_FIX_DST(p)     ((p)->dw5.fields.fixed)
#define CCP5_CMD_SHA_LO(p)      ((p)->dw4.sha_len_lo)
#define CCP5_CMD_SHA_HI(p)      ((p)->dw5.sha_len_hi)

/* Word 6/7 */
#define CCP5_CMD_DW6(p)         ((p)->key_lo)
#define CCP5_CMD_KEY_LO(p)      (CCP5_CMD_DW6(p))
#define CCP5_CMD_DW7(p)         ((p)->dw7)
#define CCP5_CMD_KEY_HI(p)      ((p)->dw7.key_hi)
#define CCP5_CMD_KEY_MEM(p)     ((p)->dw7.key_mem)

static inline u32 low_address(unsigned long addr)
{
        return (u64)addr & 0x0ffffffff;
}

static inline u32 high_address(unsigned long addr)
{
        return ((u64)addr >> 32) & 0x00000ffff;
}

static unsigned int ccp5_get_free_slots(struct ccp_cmd_queue *cmd_q)
{
        unsigned int head_idx, n;
        u32 head_lo, queue_start;

        queue_start = low_address(cmd_q->qdma_tail);
        head_lo = ioread32(cmd_q->reg_head_lo);
        head_idx = (head_lo - queue_start) / sizeof(struct ccp5_desc);

        n = head_idx + COMMANDS_PER_QUEUE - cmd_q->qidx - 1;

        return n % COMMANDS_PER_QUEUE; /* Always one unused spot */
}

static int ccp5_do_cmd(struct ccp5_desc *desc,
                       struct ccp_cmd_queue *cmd_q)
{
        __le32 *mP;
        u32 *dP;
        u32 tail;
        int     i;
        int ret = 0;

        cmd_q->total_ops++;

        if (CCP5_CMD_SOC(desc)) {
                CCP5_CMD_IOC(desc) = 1;
                CCP5_CMD_SOC(desc) = 0;
        }
        mutex_lock(&cmd_q->q_mutex);

        mP = (__le32 *)&cmd_q->qbase[cmd_q->qidx];
        dP = (u32 *)desc;
        for (i = 0; i < 8; i++)
                mP[i] = cpu_to_le32(dP[i]); /* handle endianness */

        cmd_q->qidx = (cmd_q->qidx + 1) % COMMANDS_PER_QUEUE;

        /* The data used by this command must be flushed to memory */
        wmb();

        /* Write the new tail address back to the queue register */
        tail = low_address(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE);
        iowrite32(tail, cmd_q->reg_tail_lo);

        /* Turn the queue back on using our cached control register */
        iowrite32(cmd_q->qcontrol | CMD5_Q_RUN, cmd_q->reg_control);
        mutex_unlock(&cmd_q->q_mutex);

        if (CCP5_CMD_IOC(desc)) {
                /* Wait for the job to complete */
                ret = wait_event_interruptible(cmd_q->int_queue,
                                               cmd_q->int_rcvd);
                if (ret || cmd_q->cmd_error) {
                        /* Log the error and flush the queue by
                         * moving the head pointer
                         */
                        if (cmd_q->cmd_error)
                                ccp_log_error(cmd_q->ccp,
                                              cmd_q->cmd_error);
                        iowrite32(tail, cmd_q->reg_head_lo);
                        if (!ret)
                                ret = -EIO;
                }
                cmd_q->int_rcvd = 0;
        }

        return ret;
}

static int ccp5_perform_aes(struct ccp_op *op)
{
        struct ccp5_desc desc;
        union ccp_function function;
        u32 key_addr = op->sb_key * LSB_ITEM_SIZE;

        op->cmd_q->total_aes_ops++;

        /* Zero out all the fields of the command desc */
        memset(&desc, 0, Q_DESC_SIZE);

        CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_AES;

        CCP5_CMD_SOC(&desc) = op->soc;
        CCP5_CMD_IOC(&desc) = 1;
        CCP5_CMD_INIT(&desc) = op->init;
        CCP5_CMD_EOM(&desc) = op->eom;
        CCP5_CMD_PROT(&desc) = 0;

        function.raw = 0;
        CCP_AES_ENCRYPT(&function) = op->u.aes.action;
        CCP_AES_MODE(&function) = op->u.aes.mode;
        CCP_AES_TYPE(&function) = op->u.aes.type;
        CCP_AES_SIZE(&function) = op->u.aes.size;

        CCP5_CMD_FUNCTION(&desc) = function.raw;

        CCP5_CMD_LEN(&desc) = op->src.u.dma.length;

        CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
        CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
        CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;

        CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
        CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
        CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;

        CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
        CCP5_CMD_KEY_HI(&desc) = 0;
        CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
        CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;

        return ccp5_do_cmd(&desc, op->cmd_q);
}

static int ccp5_perform_xts_aes(struct ccp_op *op)
{
        struct ccp5_desc desc;
        union ccp_function function;
        u32 key_addr = op->sb_key * LSB_ITEM_SIZE;

        op->cmd_q->total_xts_aes_ops++;

        /* Zero out all the fields of the command desc */
        memset(&desc, 0, Q_DESC_SIZE);

        CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_XTS_AES_128;

        CCP5_CMD_SOC(&desc) = op->soc;
        CCP5_CMD_IOC(&desc) = 1;
        CCP5_CMD_INIT(&desc) = op->init;
        CCP5_CMD_EOM(&desc) = op->eom;
        CCP5_CMD_PROT(&desc) = 0;

        function.raw = 0;
        CCP_XTS_TYPE(&function) = op->u.xts.type;
        CCP_XTS_ENCRYPT(&function) = op->u.xts.action;
        CCP_XTS_SIZE(&function) = op->u.xts.unit_size;
        CCP5_CMD_FUNCTION(&desc) = function.raw;

        CCP5_CMD_LEN(&desc) = op->src.u.dma.length;

        CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
        CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
        CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;

        CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
        CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
        CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;

        CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
        CCP5_CMD_KEY_HI(&desc) =  0;
        CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
        CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;

        return ccp5_do_cmd(&desc, op->cmd_q);
}

static int ccp5_perform_sha(struct ccp_op *op)
{
        struct ccp5_desc desc;
        union ccp_function function;

        op->cmd_q->total_sha_ops++;

        /* Zero out all the fields of the command desc */
        memset(&desc, 0, Q_DESC_SIZE);

        CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_SHA;

        CCP5_CMD_SOC(&desc) = op->soc;
        CCP5_CMD_IOC(&desc) = 1;
        CCP5_CMD_INIT(&desc) = 1;
        CCP5_CMD_EOM(&desc) = op->eom;
        CCP5_CMD_PROT(&desc) = 0;

        function.raw = 0;
        CCP_SHA_TYPE(&function) = op->u.sha.type;
        CCP5_CMD_FUNCTION(&desc) = function.raw;

        CCP5_CMD_LEN(&desc) = op->src.u.dma.length;

        CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
        CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
        CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;

        CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;

        if (op->eom) {
                CCP5_CMD_SHA_LO(&desc) = lower_32_bits(op->u.sha.msg_bits);
                CCP5_CMD_SHA_HI(&desc) = upper_32_bits(op->u.sha.msg_bits);
        } else {
                CCP5_CMD_SHA_LO(&desc) = 0;
                CCP5_CMD_SHA_HI(&desc) = 0;
        }

        return ccp5_do_cmd(&desc, op->cmd_q);
}

static int ccp5_perform_des3(struct ccp_op *op)
{
        struct ccp5_desc desc;
        union ccp_function function;
        u32 key_addr = op->sb_key * LSB_ITEM_SIZE;

        op->cmd_q->total_3des_ops++;

        /* Zero out all the fields of the command desc */
        memset(&desc, 0, sizeof(struct ccp5_desc));

        CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_DES3;

        CCP5_CMD_SOC(&desc) = op->soc;
        CCP5_CMD_IOC(&desc) = 1;
        CCP5_CMD_INIT(&desc) = op->init;
        CCP5_CMD_EOM(&desc) = op->eom;
        CCP5_CMD_PROT(&desc) = 0;

        function.raw = 0;
        CCP_DES3_ENCRYPT(&function) = op->u.des3.action;
        CCP_DES3_MODE(&function) = op->u.des3.mode;
        CCP_DES3_TYPE(&function) = op->u.des3.type;
        CCP5_CMD_FUNCTION(&desc) = function.raw;

        CCP5_CMD_LEN(&desc) = op->src.u.dma.length;

        CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
        CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
        CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;

        CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
        CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
        CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;

        CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
        CCP5_CMD_KEY_HI(&desc) = 0;
        CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
        CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;

        return ccp5_do_cmd(&desc, op->cmd_q);
}

static int ccp5_perform_rsa(struct ccp_op *op)
{
        struct ccp5_desc desc;
        union ccp_function function;

        op->cmd_q->total_rsa_ops++;

        /* Zero out all the fields of the command desc */
        memset(&desc, 0, Q_DESC_SIZE);

        CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_RSA;

        CCP5_CMD_SOC(&desc) = op->soc;
        CCP5_CMD_IOC(&desc) = 1;
        CCP5_CMD_INIT(&desc) = 0;
        CCP5_CMD_EOM(&desc) = 1;
        CCP5_CMD_PROT(&desc) = 0;

        function.raw = 0;
        CCP_RSA_SIZE(&function) = (op->u.rsa.mod_size + 7) >> 3;
        CCP5_CMD_FUNCTION(&desc) = function.raw;

        CCP5_CMD_LEN(&desc) = op->u.rsa.input_len;

        /* Source is from external memory */
        CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
        CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
        CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;

        /* Destination is in external memory */
        CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
        CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
        CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;

        /* Key (Exponent) is in external memory */
        CCP5_CMD_KEY_LO(&desc) = ccp_addr_lo(&op->exp.u.dma);
        CCP5_CMD_KEY_HI(&desc) = ccp_addr_hi(&op->exp.u.dma);
        CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SYSTEM;

        return ccp5_do_cmd(&desc, op->cmd_q);
}

static int ccp5_perform_passthru(struct ccp_op *op)
{
        struct ccp5_desc desc;
        union ccp_function function;
        struct ccp_dma_info *saddr = &op->src.u.dma;
        struct ccp_dma_info *daddr = &op->dst.u.dma;


        op->cmd_q->total_pt_ops++;

        memset(&desc, 0, Q_DESC_SIZE);

        CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU;

        CCP5_CMD_SOC(&desc) = 0;
        CCP5_CMD_IOC(&desc) = 1;
        CCP5_CMD_INIT(&desc) = 0;
        CCP5_CMD_EOM(&desc) = op->eom;
        CCP5_CMD_PROT(&desc) = 0;

        function.raw = 0;
        CCP_PT_BYTESWAP(&function) = op->u.passthru.byte_swap;
        CCP_PT_BITWISE(&function) = op->u.passthru.bit_mod;
        CCP5_CMD_FUNCTION(&desc) = function.raw;

        /* Length of source data is always 256 bytes */
        if (op->src.type == CCP_MEMTYPE_SYSTEM)
                CCP5_CMD_LEN(&desc) = saddr->length;
        else
                CCP5_CMD_LEN(&desc) = daddr->length;

        if (op->src.type == CCP_MEMTYPE_SYSTEM) {
                CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
                CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
                CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;

                if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
                        CCP5_CMD_LSB_ID(&desc) = op->sb_key;
        } else {
                u32 key_addr = op->src.u.sb * CCP_SB_BYTES;

                CCP5_CMD_SRC_LO(&desc) = lower_32_bits(key_addr);
                CCP5_CMD_SRC_HI(&desc) = 0;
                CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SB;
        }

        if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
                CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
                CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
                CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
        } else {
                u32 key_addr = op->dst.u.sb * CCP_SB_BYTES;

                CCP5_CMD_DST_LO(&desc) = lower_32_bits(key_addr);
                CCP5_CMD_DST_HI(&desc) = 0;
                CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SB;
        }

        return ccp5_do_cmd(&desc, op->cmd_q);
}

static int ccp5_perform_ecc(struct ccp_op *op)
{
        struct ccp5_desc desc;
        union ccp_function function;

        op->cmd_q->total_ecc_ops++;

        /* Zero out all the fields of the command desc */
        memset(&desc, 0, Q_DESC_SIZE);

        CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_ECC;

        CCP5_CMD_SOC(&desc) = 0;
        CCP5_CMD_IOC(&desc) = 1;
        CCP5_CMD_INIT(&desc) = 0;
        CCP5_CMD_EOM(&desc) = 1;
        CCP5_CMD_PROT(&desc) = 0;

        function.raw = 0;
        function.ecc.mode = op->u.ecc.function;
        CCP5_CMD_FUNCTION(&desc) = function.raw;

        CCP5_CMD_LEN(&desc) = op->src.u.dma.length;

        CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
        CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
        CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;

        CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
        CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
        CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;

        return ccp5_do_cmd(&desc, op->cmd_q);
}

static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status)
{
        int q_mask = 1 << cmd_q->id;
        int queues = 0;
        int j;

        /* Build a bit mask to know which LSBs this queue has access to.
         * Don't bother with segment 0 as it has special privileges.
         */
        for (j = 1; j < MAX_LSB_CNT; j++) {
                if (status & q_mask)
                        bitmap_set(cmd_q->lsbmask, j, 1);
                status >>= LSB_REGION_WIDTH;
        }
        queues = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
        dev_dbg(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n",
                 cmd_q->id, queues);

        return queues ? 0 : -EINVAL;
}

static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp,
                                        int lsb_cnt, int n_lsbs,
                                        unsigned long *lsb_pub)
{
        DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
        int bitno;
        int qlsb_wgt;
        int i;

        /* For each queue:
         * If the count of potential LSBs available to a queue matches the
         * ordinal given to us in lsb_cnt:
         * Copy the mask of possible LSBs for this queue into "qlsb";
         * For each bit in qlsb, see if the corresponding bit in the
         * aggregation mask is set; if so, we have a match.
         *     If we have a match, clear the bit in the aggregation to
         *     mark it as no longer available.
         *     If there is no match, clear the bit in qlsb and keep looking.
         */
        for (i = 0; i < ccp->cmd_q_count; i++) {
                struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];

                qlsb_wgt = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);

                if (qlsb_wgt == lsb_cnt) {
                        bitmap_copy(qlsb, cmd_q->lsbmask, MAX_LSB_CNT);

                        bitno = find_first_bit(qlsb, MAX_LSB_CNT);
                        while (bitno < MAX_LSB_CNT) {
                                if (test_bit(bitno, lsb_pub)) {
                                        /* We found an available LSB
                                         * that this queue can access
                                         */
                                        cmd_q->lsb = bitno;
                                        bitmap_clear(lsb_pub, bitno, 1);
                                        dev_dbg(ccp->dev,
                                                 "Queue %d gets LSB %d\n",
                                                 i, bitno);
                                        break;
                                }
                                bitmap_clear(qlsb, bitno, 1);
                                bitno = find_first_bit(qlsb, MAX_LSB_CNT);
                        }
                        if (bitno >= MAX_LSB_CNT)
                                return -EINVAL;
                        n_lsbs--;
                }
        }
        return n_lsbs;
}

/* For each queue, from the most- to least-constrained:
 * find an LSB that can be assigned to the queue. If there are N queues that
 * can only use M LSBs, where N > M, fail; otherwise, every queue will get a
 * dedicated LSB. Remaining LSB regions become a shared resource.
 * If we have fewer LSBs than queues, all LSB regions become shared resources.
 */
static int ccp_assign_lsbs(struct ccp_device *ccp)
{
        DECLARE_BITMAP(lsb_pub, MAX_LSB_CNT);
        DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
        int n_lsbs = 0;
        int bitno;
        int i, lsb_cnt;
        int rc = 0;

        bitmap_zero(lsb_pub, MAX_LSB_CNT);

        /* Create an aggregate bitmap to get a total count of available LSBs */
        for (i = 0; i < ccp->cmd_q_count; i++)
                bitmap_or(lsb_pub,
                          lsb_pub, ccp->cmd_q[i].lsbmask,
                          MAX_LSB_CNT);

        n_lsbs = bitmap_weight(lsb_pub, MAX_LSB_CNT);

        if (n_lsbs >= ccp->cmd_q_count) {
                /* We have enough LSBS to give every queue a private LSB.
                 * Brute force search to start with the queues that are more
                 * constrained in LSB choice. When an LSB is privately
                 * assigned, it is removed from the public mask.
                 * This is an ugly N squared algorithm with some optimization.
                 */
                for (lsb_cnt = 1;
                     n_lsbs && (lsb_cnt <= MAX_LSB_CNT);
                     lsb_cnt++) {
                        rc = ccp_find_and_assign_lsb_to_q(ccp, lsb_cnt, n_lsbs,
                                                          lsb_pub);
                        if (rc < 0)
                                return -EINVAL;
                        n_lsbs = rc;
                }
        }

        rc = 0;
        /* What's left of the LSBs, according to the public mask, now become
         * shared. Any zero bits in the lsb_pub mask represent an LSB region
         * that can't be used as a shared resource, so mark the LSB slots for
         * them as "in use".
         */
        bitmap_copy(qlsb, lsb_pub, MAX_LSB_CNT);

        bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
        while (bitno < MAX_LSB_CNT) {
                bitmap_set(ccp->lsbmap, bitno * LSB_SIZE, LSB_SIZE);
                bitmap_set(qlsb, bitno, 1);
                bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
        }

        return rc;
}

static void ccp5_disable_queue_interrupts(struct ccp_device *ccp)
{
        unsigned int i;

        for (i = 0; i < ccp->cmd_q_count; i++)
                iowrite32(0x0, ccp->cmd_q[i].reg_int_enable);
}

static void ccp5_enable_queue_interrupts(struct ccp_device *ccp)
{
        unsigned int i;

        for (i = 0; i < ccp->cmd_q_count; i++)
                iowrite32(SUPPORTED_INTERRUPTS, ccp->cmd_q[i].reg_int_enable);
}

static void ccp5_irq_bh(unsigned long data)
{
        struct ccp_device *ccp = (struct ccp_device *)data;
        u32 status;
        unsigned int i;

        for (i = 0; i < ccp->cmd_q_count; i++) {
                struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];

                status = ioread32(cmd_q->reg_interrupt_status);

                if (status) {
                        cmd_q->int_status = status;
                        cmd_q->q_status = ioread32(cmd_q->reg_status);
                        cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);

                        /* On error, only save the first error value */
                        if ((status & INT_ERROR) && !cmd_q->cmd_error)
                                cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);

                        cmd_q->int_rcvd = 1;

                        /* Acknowledge the interrupt and wake the kthread */
                        iowrite32(status, cmd_q->reg_interrupt_status);
                        wake_up_interruptible(&cmd_q->int_queue);
                }
        }
        ccp5_enable_queue_interrupts(ccp);
}

static irqreturn_t ccp5_irq_handler(int irq, void *data)
{
        struct ccp_device *ccp = (struct ccp_device *)data;

        ccp5_disable_queue_interrupts(ccp);
        ccp->total_interrupts++;
        if (ccp->use_tasklet)
                tasklet_schedule(&ccp->irq_tasklet);
        else
                ccp5_irq_bh((unsigned long)ccp);
        return IRQ_HANDLED;
}

static int ccp5_init(struct ccp_device *ccp)
{
        struct device *dev = ccp->dev;
        struct ccp_cmd_queue *cmd_q;
        struct dma_pool *dma_pool;
        char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
        unsigned int qmr, i;
        u64 status;
        u32 status_lo, status_hi;
        int ret;

        /* Find available queues */
        qmr = ioread32(ccp->io_regs + Q_MASK_REG);
        /*
         * Check for a access to the registers.  If this read returns
         * 0xffffffff, it's likely that the system is running a broken
         * BIOS which disallows access to the device. Stop here and fail
         * the initialization (but not the load, as the PSP could get
         * properly initialized).
         */
        if (qmr == 0xffffffff) {
                dev_notice(dev, "ccp: unable to access the device: you might be running a broken BIOS.\n");
                return 1;
        }

        for (i = 0; (i < MAX_HW_QUEUES) && (ccp->cmd_q_count < ccp->max_q_count); i++) {
                if (!(qmr & (1 << i)))
                        continue;

                /* Allocate a dma pool for this queue */
                snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d",
                         ccp->name, i);
                dma_pool = dma_pool_create(dma_pool_name, dev,
                                           CCP_DMAPOOL_MAX_SIZE,
                                           CCP_DMAPOOL_ALIGN, 0);
                if (!dma_pool) {
                        dev_err(dev, "unable to allocate dma pool\n");
                        ret = -ENOMEM;
                        goto e_pool;
                }

                cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
                ccp->cmd_q_count++;

                cmd_q->ccp = ccp;
                cmd_q->id = i;
                cmd_q->dma_pool = dma_pool;
                mutex_init(&cmd_q->q_mutex);

                /* Page alignment satisfies our needs for N <= 128 */
                BUILD_BUG_ON(COMMANDS_PER_QUEUE > 128);
                cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
                cmd_q->qbase = dmam_alloc_coherent(dev, cmd_q->qsize,
                                                   &cmd_q->qbase_dma,
                                                   GFP_KERNEL);
                if (!cmd_q->qbase) {
                        dev_err(dev, "unable to allocate command queue\n");
                        ret = -ENOMEM;
                        goto e_pool;
                }

                cmd_q->qidx = 0;
                /* Preset some register values and masks that are queue
                 * number dependent
                 */
                cmd_q->reg_control = ccp->io_regs +
                                     CMD5_Q_STATUS_INCR * (i + 1);
                cmd_q->reg_tail_lo = cmd_q->reg_control + CMD5_Q_TAIL_LO_BASE;
                cmd_q->reg_head_lo = cmd_q->reg_control + CMD5_Q_HEAD_LO_BASE;
                cmd_q->reg_int_enable = cmd_q->reg_control +
                                        CMD5_Q_INT_ENABLE_BASE;
                cmd_q->reg_interrupt_status = cmd_q->reg_control +
                                              CMD5_Q_INTERRUPT_STATUS_BASE;
                cmd_q->reg_status = cmd_q->reg_control + CMD5_Q_STATUS_BASE;
                cmd_q->reg_int_status = cmd_q->reg_control +
                                        CMD5_Q_INT_STATUS_BASE;
                cmd_q->reg_dma_status = cmd_q->reg_control +
                                        CMD5_Q_DMA_STATUS_BASE;
                cmd_q->reg_dma_read_status = cmd_q->reg_control +
                                             CMD5_Q_DMA_READ_STATUS_BASE;
                cmd_q->reg_dma_write_status = cmd_q->reg_control +
                                              CMD5_Q_DMA_WRITE_STATUS_BASE;

                init_waitqueue_head(&cmd_q->int_queue);

                dev_dbg(dev, "queue #%u available\n", i);
        }

        if (ccp->cmd_q_count == 0) {
                dev_notice(dev, "no command queues available\n");
                ret = 1;
                goto e_pool;
        }

        /* Turn off the queues and disable interrupts until ready */
        ccp5_disable_queue_interrupts(ccp);
        for (i = 0; i < ccp->cmd_q_count; i++) {
                cmd_q = &ccp->cmd_q[i];

                cmd_q->qcontrol = 0; /* Start with nothing */
                iowrite32(cmd_q->qcontrol, cmd_q->reg_control);

                ioread32(cmd_q->reg_int_status);
                ioread32(cmd_q->reg_status);

                /* Clear the interrupt status */
                iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_interrupt_status);
        }

        dev_dbg(dev, "Requesting an IRQ...\n");
        /* Request an irq */
        ret = sp_request_ccp_irq(ccp->sp, ccp5_irq_handler, ccp->name, ccp);
        if (ret) {
                dev_err(dev, "unable to allocate an IRQ\n");
                goto e_pool;
        }
        /* Initialize the ISR tasklet */
        if (ccp->use_tasklet)
                tasklet_init(&ccp->irq_tasklet, ccp5_irq_bh,
                             (unsigned long)ccp);

        dev_dbg(dev, "Loading LSB map...\n");
        /* Copy the private LSB mask to the public registers */
        status_lo = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
        status_hi = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
        iowrite32(status_lo, ccp->io_regs + LSB_PUBLIC_MASK_LO_OFFSET);
        iowrite32(status_hi, ccp->io_regs + LSB_PUBLIC_MASK_HI_OFFSET);
        status = ((u64)status_hi<<30) | (u64)status_lo;

        dev_dbg(dev, "Configuring virtual queues...\n");
        /* Configure size of each virtual queue accessible to host */
        for (i = 0; i < ccp->cmd_q_count; i++) {
                u32 dma_addr_lo;
                u32 dma_addr_hi;

                cmd_q = &ccp->cmd_q[i];

                cmd_q->qcontrol &= ~(CMD5_Q_SIZE << CMD5_Q_SHIFT);
                cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD5_Q_SHIFT;

                cmd_q->qdma_tail = cmd_q->qbase_dma;
                dma_addr_lo = low_address(cmd_q->qdma_tail);
                iowrite32((u32)dma_addr_lo, cmd_q->reg_tail_lo);
                iowrite32((u32)dma_addr_lo, cmd_q->reg_head_lo);

                dma_addr_hi = high_address(cmd_q->qdma_tail);
                cmd_q->qcontrol |= (dma_addr_hi << 16);
                iowrite32(cmd_q->qcontrol, cmd_q->reg_control);

                /* Find the LSB regions accessible to the queue */
                ccp_find_lsb_regions(cmd_q, status);
                cmd_q->lsb = -1; /* Unassigned value */
        }

        dev_dbg(dev, "Assigning LSBs...\n");
        ret = ccp_assign_lsbs(ccp);
        if (ret) {
                dev_err(dev, "Unable to assign LSBs (%d)\n", ret);
                goto e_irq;
        }

        /* Optimization: pre-allocate LSB slots for each queue */
        for (i = 0; i < ccp->cmd_q_count; i++) {
                ccp->cmd_q[i].sb_key = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
                ccp->cmd_q[i].sb_ctx = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
        }

        dev_dbg(dev, "Starting threads...\n");
        /* Create a kthread for each queue */
        for (i = 0; i < ccp->cmd_q_count; i++) {
                struct task_struct *kthread;

                cmd_q = &ccp->cmd_q[i];

                kthread = kthread_run(ccp_cmd_queue_thread, cmd_q,
                                      "%s-q%u", ccp->name, cmd_q->id);
                if (IS_ERR(kthread)) {
                        dev_err(dev, "error creating queue thread (%ld)\n",
                                PTR_ERR(kthread));
                        ret = PTR_ERR(kthread);
                        goto e_kthread;
                }

                cmd_q->kthread = kthread;
        }

        dev_dbg(dev, "Enabling interrupts...\n");
        ccp5_enable_queue_interrupts(ccp);

        dev_dbg(dev, "Registering device...\n");
        /* Put this on the unit list to make it available */
        ccp_add_device(ccp);

        ret = ccp_register_rng(ccp);
        if (ret)
                goto e_kthread;

        /* Register the DMA engine support */
        ret = ccp_dmaengine_register(ccp);
        if (ret)
                goto e_hwrng;

#ifdef CONFIG_CRYPTO_DEV_CCP_DEBUGFS
        /* Set up debugfs entries */
        ccp5_debugfs_setup(ccp);
#endif

        return 0;

e_hwrng:
        ccp_unregister_rng(ccp);

e_kthread:
        for (i = 0; i < ccp->cmd_q_count; i++)
                if (ccp->cmd_q[i].kthread)
                        kthread_stop(ccp->cmd_q[i].kthread);

e_irq:
        sp_free_ccp_irq(ccp->sp, ccp);

e_pool:
        for (i = 0; i < ccp->cmd_q_count; i++)
                dma_pool_destroy(ccp->cmd_q[i].dma_pool);

        return ret;
}

static void ccp5_destroy(struct ccp_device *ccp)
{
        struct ccp_cmd_queue *cmd_q;
        struct ccp_cmd *cmd;
        unsigned int i;

        /* Unregister the DMA engine */
        ccp_dmaengine_unregister(ccp);

        /* Unregister the RNG */
        ccp_unregister_rng(ccp);

        /* Remove this device from the list of available units first */
        ccp_del_device(ccp);

#ifdef CONFIG_CRYPTO_DEV_CCP_DEBUGFS
        /* We're in the process of tearing down the entire driver;
         * when all the devices are gone clean up debugfs
         */
        if (ccp_present())
                ccp5_debugfs_destroy();
#endif

        /* Disable and clear interrupts */
        ccp5_disable_queue_interrupts(ccp);
        for (i = 0; i < ccp->cmd_q_count; i++) {
                cmd_q = &ccp->cmd_q[i];

                /* Turn off the run bit */
                iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control);

                /* Clear the interrupt status */
                iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_interrupt_status);
                ioread32(cmd_q->reg_int_status);
                ioread32(cmd_q->reg_status);
        }

        /* Stop the queue kthreads */
        for (i = 0; i < ccp->cmd_q_count; i++)
                if (ccp->cmd_q[i].kthread)
                        kthread_stop(ccp->cmd_q[i].kthread);

        sp_free_ccp_irq(ccp->sp, ccp);

        /* Flush the cmd and backlog queue */
        while (!list_empty(&ccp->cmd)) {
                /* Invoke the callback directly with an error code */
                cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
                list_del(&cmd->entry);
                cmd->callback(cmd->data, -ENODEV);
        }
        while (!list_empty(&ccp->backlog)) {
                /* Invoke the callback directly with an error code */
                cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
                list_del(&cmd->entry);
                cmd->callback(cmd->data, -ENODEV);
        }
}

static void ccp5_config(struct ccp_device *ccp)
{
        /* Public side */
        iowrite32(0x0, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET);
}

static void ccp5other_config(struct ccp_device *ccp)
{
        int i;
        u32 rnd;

        /* We own all of the queues on the NTB CCP */

        iowrite32(0x00012D57, ccp->io_regs + CMD5_TRNG_CTL_OFFSET);
        iowrite32(0x00000003, ccp->io_regs + CMD5_CONFIG_0_OFFSET);
        for (i = 0; i < 12; i++) {
                rnd = ioread32(ccp->io_regs + TRNG_OUT_REG);
                iowrite32(rnd, ccp->io_regs + CMD5_AES_MASK_OFFSET);
        }

        iowrite32(0x0000001F, ccp->io_regs + CMD5_QUEUE_MASK_OFFSET);
        iowrite32(0x00005B6D, ccp->io_regs + CMD5_QUEUE_PRIO_OFFSET);
        iowrite32(0x00000000, ccp->io_regs + CMD5_CMD_TIMEOUT_OFFSET);

        iowrite32(0x3FFFFFFF, ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
        iowrite32(0x000003FF, ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);

        iowrite32(0x00108823, ccp->io_regs + CMD5_CLK_GATE_CTL_OFFSET);

        ccp5_config(ccp);
}

/* Version 5 adds some function, but is essentially the same as v5 */
static const struct ccp_actions ccp5_actions = {
        .aes = ccp5_perform_aes,
        .xts_aes = ccp5_perform_xts_aes,
        .sha = ccp5_perform_sha,
        .des3 = ccp5_perform_des3,
        .rsa = ccp5_perform_rsa,
        .passthru = ccp5_perform_passthru,
        .ecc = ccp5_perform_ecc,
        .sballoc = ccp_lsb_alloc,
        .sbfree = ccp_lsb_free,
        .init = ccp5_init,
        .destroy = ccp5_destroy,
        .get_free_slots = ccp5_get_free_slots,
};

const struct ccp_vdata ccpv5a = {
        .version = CCP_VERSION(5, 0),
        .setup = ccp5_config,
        .perform = &ccp5_actions,
        .offset = 0x0,
        .rsamax = CCP5_RSA_MAX_WIDTH,
};

const struct ccp_vdata ccpv5b = {
        .version = CCP_VERSION(5, 0),
        .dma_chan_attr = DMA_PRIVATE,
        .setup = ccp5other_config,
        .perform = &ccp5_actions,
        .offset = 0x0,
        .rsamax = CCP5_RSA_MAX_WIDTH,
};