root/drivers/infiniband/sw/siw/siw_qp_rx.c
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause

/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
/* Copyright (c) 2008-2019, IBM Corporation */

#include <linux/errno.h>
#include <linux/types.h>
#include <linux/net.h>
#include <linux/scatterlist.h>
#include <linux/highmem.h>

#include <rdma/iw_cm.h>
#include <rdma/ib_verbs.h>

#include "siw.h"
#include "siw_verbs.h"
#include "siw_mem.h"

/*
 * siw_rx_umem()
 *
 * Receive data of @len into target referenced by @dest_addr.
 *
 * @srx:        Receive Context
 * @umem:       siw representation of target memory
 * @dest_addr:  user virtual address
 * @len:        number of bytes to place
 */
static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem,
                       u64 dest_addr, int len)
{
        int copied = 0;

        while (len) {
                struct page *p;
                int pg_off, bytes, rv;
                void *dest;

                p = siw_get_upage(umem, dest_addr);
                if (unlikely(!p)) {
                        pr_warn("siw: %s: [QP %u]: bogus addr: %p, %p\n",
                                __func__, qp_id(rx_qp(srx)),
                                (void *)(uintptr_t)dest_addr,
                                (void *)(uintptr_t)umem->fp_addr);
                        /* siw internal error */
                        srx->skb_copied += copied;
                        srx->skb_new -= copied;

                        return -EFAULT;
                }
                pg_off = dest_addr & ~PAGE_MASK;
                bytes = min(len, (int)PAGE_SIZE - pg_off);

                siw_dbg_qp(rx_qp(srx), "page %p, bytes=%u\n", p, bytes);

                dest = kmap_atomic(p);
                rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off,
                                   bytes);

                if (unlikely(rv)) {
                        kunmap_atomic(dest);
                        srx->skb_copied += copied;
                        srx->skb_new -= copied;

                        pr_warn("siw: [QP %u]: %s, len %d, page %p, rv %d\n",
                                qp_id(rx_qp(srx)), __func__, len, p, rv);

                        return -EFAULT;
                }
                if (srx->mpa_crc_enabled) {
                        if (rdma_is_kernel_res(&rx_qp(srx)->base_qp.res)) {
                                siw_crc_update(&srx->mpa_crc, dest + pg_off,
                                               bytes);
                                kunmap_atomic(dest);
                        } else {
                                kunmap_atomic(dest);
                                /*
                                 * Do CRC on original, not target buffer.
                                 * Some user land applications may
                                 * concurrently write the target buffer,
                                 * which would yield a broken CRC.
                                 * Walking the skb twice is very ineffcient.
                                 * Folding the CRC into skb_copy_bits()
                                 * would be much better, but is currently
                                 * not supported.
                                 */
                                siw_crc_skb(srx, bytes);
                        }
                } else {
                        kunmap_atomic(dest);
                }
                srx->skb_offset += bytes;
                copied += bytes;
                len -= bytes;
                dest_addr += bytes;
                pg_off = 0;
        }
        srx->skb_copied += copied;
        srx->skb_new -= copied;

        return copied;
}

static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len)
{
        int rv;

        siw_dbg_qp(rx_qp(srx), "kva: 0x%p, len: %u\n", kva, len);

        rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len);
        if (unlikely(rv)) {
                pr_warn("siw: [QP %u]: %s, len %d, kva 0x%p, rv %d\n",
                        qp_id(rx_qp(srx)), __func__, len, kva, rv);

                return rv;
        }
        if (srx->mpa_crc_enabled)
                siw_crc_update(&srx->mpa_crc, kva, len);

        srx->skb_offset += len;
        srx->skb_copied += len;
        srx->skb_new -= len;

        return len;
}

static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx,
                      struct siw_mem *mem, u64 addr, int len)
{
        struct siw_pbl *pbl = mem->pbl;
        u64 offset = addr - mem->va;
        int copied = 0;

        while (len) {
                int bytes;
                dma_addr_t buf_addr =
                        siw_pbl_get_buffer(pbl, offset, &bytes, pbl_idx);
                if (!buf_addr)
                        break;

                bytes = min(bytes, len);
                if (siw_rx_kva(srx, ib_virt_dma_to_ptr(buf_addr), bytes) ==
                    bytes) {
                        copied += bytes;
                        offset += bytes;
                        len -= bytes;
                } else {
                        break;
                }
        }
        return copied;
}

/*
 * siw_rresp_check_ntoh()
 *
 * Check incoming RRESP fragment header against expected
 * header values and update expected values for potential next
 * fragment.
 *
 * NOTE: This function must be called only if a RRESP DDP segment
 *       starts but not for fragmented consecutive pieces of an
 *       already started DDP segment.
 */
static int siw_rresp_check_ntoh(struct siw_rx_stream *srx,
                                struct siw_rx_fpdu *frx)
{
        struct iwarp_rdma_rresp *rresp = &srx->hdr.rresp;
        struct siw_wqe *wqe = &frx->wqe_active;
        enum ddp_ecode ecode;

        u32 sink_stag = be32_to_cpu(rresp->sink_stag);
        u64 sink_to = be64_to_cpu(rresp->sink_to);

        if (frx->first_ddp_seg) {
                srx->ddp_stag = wqe->sqe.sge[0].lkey;
                srx->ddp_to = wqe->sqe.sge[0].laddr;
                frx->pbl_idx = 0;
        }
        /* Below checks extend beyond the semantics of DDP, and
         * into RDMAP:
         * We check if the read response matches exactly the
         * read request which was send to the remote peer to
         * trigger this read response. RFC5040/5041 do not
         * always have a proper error code for the detected
         * error cases. We choose 'base or bounds error' for
         * cases where the inbound STag is valid, but offset
         * or length do not match our response receive state.
         */
        if (unlikely(srx->ddp_stag != sink_stag)) {
                pr_warn("siw: [QP %u]: rresp stag: %08x != %08x\n",
                        qp_id(rx_qp(srx)), sink_stag, srx->ddp_stag);
                ecode = DDP_ECODE_T_INVALID_STAG;
                goto error;
        }
        if (unlikely(srx->ddp_to != sink_to)) {
                pr_warn("siw: [QP %u]: rresp off: %016llx != %016llx\n",
                        qp_id(rx_qp(srx)), (unsigned long long)sink_to,
                        (unsigned long long)srx->ddp_to);
                ecode = DDP_ECODE_T_BASE_BOUNDS;
                goto error;
        }
        if (unlikely(!frx->more_ddp_segs &&
                     (wqe->processed + srx->fpdu_part_rem != wqe->bytes))) {
                pr_warn("siw: [QP %u]: rresp len: %d != %d\n",
                        qp_id(rx_qp(srx)),
                        wqe->processed + srx->fpdu_part_rem, wqe->bytes);
                ecode = DDP_ECODE_T_BASE_BOUNDS;
                goto error;
        }
        return 0;
error:
        siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
                           DDP_ETYPE_TAGGED_BUF, ecode, 0);
        return -EINVAL;
}

/*
 * siw_write_check_ntoh()
 *
 * Check incoming WRITE fragment header against expected
 * header values and update expected values for potential next
 * fragment
 *
 * NOTE: This function must be called only if a WRITE DDP segment
 *       starts but not for fragmented consecutive pieces of an
 *       already started DDP segment.
 */
static int siw_write_check_ntoh(struct siw_rx_stream *srx,
                                struct siw_rx_fpdu *frx)
{
        struct iwarp_rdma_write *write = &srx->hdr.rwrite;
        enum ddp_ecode ecode;

        u32 sink_stag = be32_to_cpu(write->sink_stag);
        u64 sink_to = be64_to_cpu(write->sink_to);

        if (frx->first_ddp_seg) {
                srx->ddp_stag = sink_stag;
                srx->ddp_to = sink_to;
                frx->pbl_idx = 0;
        } else {
                if (unlikely(srx->ddp_stag != sink_stag)) {
                        pr_warn("siw: [QP %u]: write stag: %08x != %08x\n",
                                qp_id(rx_qp(srx)), sink_stag,
                                srx->ddp_stag);
                        ecode = DDP_ECODE_T_INVALID_STAG;
                        goto error;
                }
                if (unlikely(srx->ddp_to != sink_to)) {
                        pr_warn("siw: [QP %u]: write off: %016llx != %016llx\n",
                                qp_id(rx_qp(srx)),
                                (unsigned long long)sink_to,
                                (unsigned long long)srx->ddp_to);
                        ecode = DDP_ECODE_T_BASE_BOUNDS;
                        goto error;
                }
        }
        return 0;
error:
        siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
                           DDP_ETYPE_TAGGED_BUF, ecode, 0);
        return -EINVAL;
}

/*
 * siw_send_check_ntoh()
 *
 * Check incoming SEND fragment header against expected
 * header values and update expected MSN if no next
 * fragment expected
 *
 * NOTE: This function must be called only if a SEND DDP segment
 *       starts but not for fragmented consecutive pieces of an
 *       already started DDP segment.
 */
static int siw_send_check_ntoh(struct siw_rx_stream *srx,
                               struct siw_rx_fpdu *frx)
{
        struct iwarp_send_inv *send = &srx->hdr.send_inv;
        struct siw_wqe *wqe = &frx->wqe_active;
        enum ddp_ecode ecode;

        u32 ddp_msn = be32_to_cpu(send->ddp_msn);
        u32 ddp_mo = be32_to_cpu(send->ddp_mo);
        u32 ddp_qn = be32_to_cpu(send->ddp_qn);

        if (unlikely(ddp_qn != RDMAP_UNTAGGED_QN_SEND)) {
                pr_warn("siw: [QP %u]: invalid ddp qn %d for send\n",
                        qp_id(rx_qp(srx)), ddp_qn);
                ecode = DDP_ECODE_UT_INVALID_QN;
                goto error;
        }
        if (unlikely(ddp_msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND])) {
                pr_warn("siw: [QP %u]: send msn: %u != %u\n",
                        qp_id(rx_qp(srx)), ddp_msn,
                        srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]);
                ecode = DDP_ECODE_UT_INVALID_MSN_RANGE;
                goto error;
        }
        if (unlikely(ddp_mo != wqe->processed)) {
                pr_warn("siw: [QP %u], send mo: %u != %u\n",
                        qp_id(rx_qp(srx)), ddp_mo, wqe->processed);
                ecode = DDP_ECODE_UT_INVALID_MO;
                goto error;
        }
        if (frx->first_ddp_seg) {
                /* initialize user memory write position */
                frx->sge_idx = 0;
                frx->sge_off = 0;
                frx->pbl_idx = 0;

                /* only valid for SEND_INV and SEND_SE_INV operations */
                srx->inval_stag = be32_to_cpu(send->inval_stag);
        }
        if (unlikely(wqe->bytes < wqe->processed + srx->fpdu_part_rem)) {
                siw_dbg_qp(rx_qp(srx), "receive space short: %d - %d < %d\n",
                           wqe->bytes, wqe->processed, srx->fpdu_part_rem);
                wqe->wc_status = SIW_WC_LOC_LEN_ERR;
                ecode = DDP_ECODE_UT_INVALID_MSN_NOBUF;
                goto error;
        }
        return 0;
error:
        siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
                           DDP_ETYPE_UNTAGGED_BUF, ecode, 0);
        return -EINVAL;
}

static struct siw_wqe *siw_rqe_get(struct siw_qp *qp)
{
        struct siw_rqe *rqe;
        struct siw_srq *srq;
        struct siw_wqe *wqe = NULL;
        bool srq_event = false;
        unsigned long flags;

        srq = qp->srq;
        if (srq) {
                spin_lock_irqsave(&srq->lock, flags);
                if (unlikely(!srq->num_rqe))
                        goto out;

                rqe = &srq->recvq[srq->rq_get % srq->num_rqe];
        } else {
                if (unlikely(!qp->recvq))
                        goto out;

                rqe = &qp->recvq[qp->rq_get % qp->attrs.rq_size];
        }
        if (likely(rqe->flags == SIW_WQE_VALID)) {
                int num_sge = rqe->num_sge;

                if (likely(num_sge <= SIW_MAX_SGE)) {
                        int i = 0;

                        wqe = rx_wqe(&qp->rx_untagged);
                        rx_type(wqe) = SIW_OP_RECEIVE;
                        wqe->wr_status = SIW_WR_INPROGRESS;
                        wqe->bytes = 0;
                        wqe->processed = 0;

                        wqe->rqe.id = rqe->id;
                        wqe->rqe.num_sge = num_sge;

                        while (i < num_sge) {
                                wqe->rqe.sge[i].laddr = rqe->sge[i].laddr;
                                wqe->rqe.sge[i].lkey = rqe->sge[i].lkey;
                                wqe->rqe.sge[i].length = rqe->sge[i].length;
                                wqe->bytes += wqe->rqe.sge[i].length;
                                wqe->mem[i] = NULL;
                                i++;
                        }
                        /* can be re-used by appl */
                        smp_store_mb(rqe->flags, 0);
                } else {
                        siw_dbg_qp(qp, "too many sge's: %d\n", rqe->num_sge);
                        if (srq)
                                spin_unlock_irqrestore(&srq->lock, flags);
                        return NULL;
                }
                if (!srq) {
                        qp->rq_get++;
                } else {
                        if (srq->armed) {
                                /* Test SRQ limit */
                                u32 off = (srq->rq_get + srq->limit) %
                                          srq->num_rqe;
                                struct siw_rqe *rqe2 = &srq->recvq[off];

                                if (!(rqe2->flags & SIW_WQE_VALID)) {
                                        srq->armed = false;
                                        srq_event = true;
                                }
                        }
                        srq->rq_get++;
                }
        }
out:
        if (srq) {
                spin_unlock_irqrestore(&srq->lock, flags);
                if (srq_event)
                        siw_srq_event(srq, IB_EVENT_SRQ_LIMIT_REACHED);
        }
        return wqe;
}

static int siw_rx_data(struct siw_mem *mem_p, struct siw_rx_stream *srx,
                       unsigned int *pbl_idx, u64 addr, int bytes)
{
        int rv;

        if (mem_p->mem_obj == NULL)
                rv = siw_rx_kva(srx, ib_virt_dma_to_ptr(addr), bytes);
        else if (!mem_p->is_pbl)
                rv = siw_rx_umem(srx, mem_p->umem, addr, bytes);
        else
                rv = siw_rx_pbl(srx, pbl_idx, mem_p, addr, bytes);
        return rv;
}

/*
 * siw_proc_send:
 *
 * Process one incoming SEND and place data into memory referenced by
 * receive wqe.
 *
 * Function supports partially received sends (suspending/resuming
 * current receive wqe processing)
 *
 * return value:
 *      0:       reached the end of a DDP segment
 *      -EAGAIN: to be called again to finish the DDP segment
 */
int siw_proc_send(struct siw_qp *qp)
{
        struct siw_rx_stream *srx = &qp->rx_stream;
        struct siw_rx_fpdu *frx = &qp->rx_untagged;
        struct siw_wqe *wqe;
        u32 data_bytes; /* all data bytes available */
        u32 rcvd_bytes; /* sum of data bytes rcvd */
        int rv = 0;

        if (frx->first_ddp_seg) {
                wqe = siw_rqe_get(qp);
                if (unlikely(!wqe)) {
                        siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
                                           DDP_ETYPE_UNTAGGED_BUF,
                                           DDP_ECODE_UT_INVALID_MSN_NOBUF, 0);
                        return -ENOENT;
                }
        } else {
                wqe = rx_wqe(frx);
        }
        if (srx->state == SIW_GET_DATA_START) {
                rv = siw_send_check_ntoh(srx, frx);
                if (unlikely(rv)) {
                        siw_qp_event(qp, IB_EVENT_QP_FATAL);
                        return rv;
                }
                if (!srx->fpdu_part_rem) /* zero length SEND */
                        return 0;
        }
        data_bytes = min(srx->fpdu_part_rem, srx->skb_new);
        rcvd_bytes = 0;

        /* A zero length SEND will skip below loop */
        while (data_bytes) {
                struct ib_pd *pd;
                struct siw_mem **mem, *mem_p;
                struct siw_sge *sge;
                u32 sge_bytes; /* data bytes avail for SGE */

                sge = &wqe->rqe.sge[frx->sge_idx];

                if (!sge->length) {
                        /* just skip empty sge's */
                        frx->sge_idx++;
                        frx->sge_off = 0;
                        frx->pbl_idx = 0;
                        continue;
                }
                sge_bytes = min(data_bytes, sge->length - frx->sge_off);
                mem = &wqe->mem[frx->sge_idx];

                /*
                 * check with QP's PD if no SRQ present, SRQ's PD otherwise
                 */
                pd = qp->srq == NULL ? qp->pd : qp->srq->base_srq.pd;

                rv = siw_check_sge(pd, sge, mem, IB_ACCESS_LOCAL_WRITE,
                                   frx->sge_off, sge_bytes);
                if (unlikely(rv)) {
                        siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
                                           DDP_ETYPE_CATASTROPHIC,
                                           DDP_ECODE_CATASTROPHIC, 0);

                        siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
                        break;
                }
                mem_p = *mem;
                rv = siw_rx_data(mem_p, srx, &frx->pbl_idx,
                                 sge->laddr + frx->sge_off, sge_bytes);
                if (unlikely(rv != sge_bytes)) {
                        wqe->processed += rcvd_bytes;

                        siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
                                           DDP_ETYPE_CATASTROPHIC,
                                           DDP_ECODE_CATASTROPHIC, 0);
                        return -EINVAL;
                }
                frx->sge_off += rv;

                if (frx->sge_off == sge->length) {
                        frx->sge_idx++;
                        frx->sge_off = 0;
                        frx->pbl_idx = 0;
                }
                data_bytes -= rv;
                rcvd_bytes += rv;

                srx->fpdu_part_rem -= rv;
                srx->fpdu_part_rcvd += rv;
        }
        wqe->processed += rcvd_bytes;

        if (!srx->fpdu_part_rem)
                return 0;

        return (rv < 0) ? rv : -EAGAIN;
}

/*
 * siw_proc_write:
 *
 * Place incoming WRITE after referencing and checking target buffer

 * Function supports partially received WRITEs (suspending/resuming
 * current receive processing)
 *
 * return value:
 *      0:       reached the end of a DDP segment
 *      -EAGAIN: to be called again to finish the DDP segment
 */
int siw_proc_write(struct siw_qp *qp)
{
        struct siw_rx_stream *srx = &qp->rx_stream;
        struct siw_rx_fpdu *frx = &qp->rx_tagged;
        struct siw_mem *mem;
        int bytes, rv;

        if (srx->state == SIW_GET_DATA_START) {
                if (!srx->fpdu_part_rem) /* zero length WRITE */
                        return 0;

                rv = siw_write_check_ntoh(srx, frx);
                if (unlikely(rv)) {
                        siw_qp_event(qp, IB_EVENT_QP_FATAL);
                        return rv;
                }
        }
        bytes = min(srx->fpdu_part_rem, srx->skb_new);

        if (frx->first_ddp_seg) {
                struct siw_wqe *wqe = rx_wqe(frx);

                rx_mem(frx) = siw_mem_id2obj(qp->sdev, srx->ddp_stag >> 8);
                if (unlikely(!rx_mem(frx))) {
                        siw_dbg_qp(qp,
                                   "sink stag not found/invalid, stag 0x%08x\n",
                                   srx->ddp_stag);

                        siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
                                           DDP_ETYPE_TAGGED_BUF,
                                           DDP_ECODE_T_INVALID_STAG, 0);
                        return -EINVAL;
                }
                wqe->rqe.num_sge = 1;
                rx_type(wqe) = SIW_OP_WRITE;
                wqe->wr_status = SIW_WR_INPROGRESS;
        }
        mem = rx_mem(frx);

        /*
         * Check if application re-registered memory with different
         * key field of STag.
         */
        if (unlikely(mem->stag != srx->ddp_stag)) {
                siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
                                   DDP_ETYPE_TAGGED_BUF,
                                   DDP_ECODE_T_INVALID_STAG, 0);
                return -EINVAL;
        }
        rv = siw_check_mem(qp->pd, mem, srx->ddp_to + srx->fpdu_part_rcvd,
                           IB_ACCESS_REMOTE_WRITE, bytes);
        if (unlikely(rv)) {
                siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
                                   DDP_ETYPE_TAGGED_BUF, siw_tagged_error(-rv),
                                   0);

                siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);

                return -EINVAL;
        }

        rv = siw_rx_data(mem, srx, &frx->pbl_idx,
                         srx->ddp_to + srx->fpdu_part_rcvd, bytes);
        if (unlikely(rv != bytes)) {
                siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
                                   DDP_ETYPE_CATASTROPHIC,
                                   DDP_ECODE_CATASTROPHIC, 0);
                return -EINVAL;
        }
        srx->fpdu_part_rem -= rv;
        srx->fpdu_part_rcvd += rv;

        if (!srx->fpdu_part_rem) {
                srx->ddp_to += srx->fpdu_part_rcvd;
                return 0;
        }
        return -EAGAIN;
}

/*
 * Inbound RREQ's cannot carry user data.
 */
int siw_proc_rreq(struct siw_qp *qp)
{
        struct siw_rx_stream *srx = &qp->rx_stream;

        if (!srx->fpdu_part_rem)
                return 0;

        pr_warn("siw: [QP %u]: rreq with mpa len %d\n", qp_id(qp),
                be16_to_cpu(srx->hdr.ctrl.mpa_len));

        return -EPROTO;
}

/*
 * siw_init_rresp:
 *
 * Process inbound RDMA READ REQ. Produce a pseudo READ RESPONSE WQE.
 * Put it at the tail of the IRQ, if there is another WQE currently in
 * transmit processing. If not, make it the current WQE to be processed
 * and schedule transmit processing.
 *
 * Can be called from softirq context and from process
 * context (RREAD socket loopback case!)
 *
 * return value:
 *      0:      success,
 *              failure code otherwise
 */

static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx)
{
        struct siw_wqe *tx_work = tx_wqe(qp);
        struct siw_sqe *resp;

        uint64_t raddr = be64_to_cpu(srx->hdr.rreq.sink_to),
                 laddr = be64_to_cpu(srx->hdr.rreq.source_to);
        uint32_t length = be32_to_cpu(srx->hdr.rreq.read_size),
                 lkey = be32_to_cpu(srx->hdr.rreq.source_stag),
                 rkey = be32_to_cpu(srx->hdr.rreq.sink_stag),
                 msn = be32_to_cpu(srx->hdr.rreq.ddp_msn);

        int run_sq = 1, rv = 0;
        unsigned long flags;

        if (unlikely(msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ])) {
                siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
                                   DDP_ETYPE_UNTAGGED_BUF,
                                   DDP_ECODE_UT_INVALID_MSN_RANGE, 0);
                return -EPROTO;
        }
        spin_lock_irqsave(&qp->sq_lock, flags);

        if (unlikely(!qp->attrs.irq_size)) {
                run_sq = 0;
                goto error_irq;
        }
        if (tx_work->wr_status == SIW_WR_IDLE) {
                /*
                 * immediately schedule READ response w/o
                 * consuming IRQ entry: IRQ must be empty.
                 */
                tx_work->processed = 0;
                tx_work->mem[0] = NULL;
                tx_work->wr_status = SIW_WR_QUEUED;
                resp = &tx_work->sqe;
        } else {
                resp = irq_alloc_free(qp);
                run_sq = 0;
        }
        if (likely(resp)) {
                resp->opcode = SIW_OP_READ_RESPONSE;

                resp->sge[0].length = length;
                resp->sge[0].laddr = laddr;
                resp->sge[0].lkey = lkey;

                /* Keep aside message sequence number for potential
                 * error reporting during Read Response generation.
                 */
                resp->sge[1].length = msn;

                resp->raddr = raddr;
                resp->rkey = rkey;
                resp->num_sge = length ? 1 : 0;

                /* RRESP now valid as current TX wqe or placed into IRQ */
                smp_store_mb(resp->flags, SIW_WQE_VALID);
        } else {
error_irq:
                pr_warn("siw: [QP %u]: IRQ exceeded or null, size %d\n",
                        qp_id(qp), qp->attrs.irq_size);

                siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
                                   RDMAP_ETYPE_REMOTE_OPERATION,
                                   RDMAP_ECODE_CATASTROPHIC_STREAM, 0);
                rv = -EPROTO;
        }

        spin_unlock_irqrestore(&qp->sq_lock, flags);

        if (run_sq)
                rv = siw_sq_start(qp);

        return rv;
}

/*
 * Only called at start of Read.Resonse processing.
 * Transfer pending Read from tip of ORQ into currrent rx wqe,
 * but keep ORQ entry valid until Read.Response processing done.
 * No Queue locking needed.
 */
static int siw_orqe_start_rx(struct siw_qp *qp)
{
        struct siw_sqe *orqe;
        struct siw_wqe *wqe = NULL;

        if (unlikely(!qp->attrs.orq_size))
                return -EPROTO;

        /* make sure ORQ indices are current */
        smp_mb();

        orqe = orq_get_current(qp);
        if (READ_ONCE(orqe->flags) & SIW_WQE_VALID) {
                /* RRESP is a TAGGED RDMAP operation */
                wqe = rx_wqe(&qp->rx_tagged);
                wqe->sqe.id = orqe->id;
                wqe->sqe.opcode = orqe->opcode;
                wqe->sqe.sge[0].laddr = orqe->sge[0].laddr;
                wqe->sqe.sge[0].lkey = orqe->sge[0].lkey;
                wqe->sqe.sge[0].length = orqe->sge[0].length;
                wqe->sqe.flags = orqe->flags;
                wqe->sqe.num_sge = 1;
                wqe->bytes = orqe->sge[0].length;
                wqe->processed = 0;
                wqe->mem[0] = NULL;
                /* make sure WQE is completely written before valid */
                smp_wmb();
                wqe->wr_status = SIW_WR_INPROGRESS;

                return 0;
        }
        return -EPROTO;
}

/*
 * siw_proc_rresp:
 *
 * Place incoming RRESP data into memory referenced by RREQ WQE
 * which is at the tip of the ORQ
 *
 * Function supports partially received RRESP's (suspending/resuming
 * current receive processing)
 */
int siw_proc_rresp(struct siw_qp *qp)
{
        struct siw_rx_stream *srx = &qp->rx_stream;
        struct siw_rx_fpdu *frx = &qp->rx_tagged;
        struct siw_wqe *wqe = rx_wqe(frx);
        struct siw_mem **mem, *mem_p;
        struct siw_sge *sge;
        int bytes, rv;

        if (frx->first_ddp_seg) {
                if (unlikely(wqe->wr_status != SIW_WR_IDLE)) {
                        pr_warn("siw: [QP %u]: proc RRESP: status %d, op %d\n",
                                qp_id(qp), wqe->wr_status, wqe->sqe.opcode);
                        rv = -EPROTO;
                        goto error_term;
                }
                /*
                 * fetch pending RREQ from orq
                 */
                rv = siw_orqe_start_rx(qp);
                if (rv) {
                        pr_warn("siw: [QP %u]: ORQ empty, size %d\n",
                                qp_id(qp), qp->attrs.orq_size);
                        goto error_term;
                }
                rv = siw_rresp_check_ntoh(srx, frx);
                if (unlikely(rv)) {
                        siw_qp_event(qp, IB_EVENT_QP_FATAL);
                        return rv;
                }
        } else {
                if (unlikely(wqe->wr_status != SIW_WR_INPROGRESS)) {
                        pr_warn("siw: [QP %u]: resume RRESP: status %d\n",
                                qp_id(qp), wqe->wr_status);
                        rv = -EPROTO;
                        goto error_term;
                }
        }
        if (!srx->fpdu_part_rem) /* zero length RRESPONSE */
                return 0;

        sge = wqe->sqe.sge; /* there is only one */
        mem = &wqe->mem[0];

        if (!(*mem)) {
                /*
                 * check target memory which resolves memory on first fragment
                 */
                rv = siw_check_sge(qp->pd, sge, mem, IB_ACCESS_LOCAL_WRITE, 0,
                                   wqe->bytes);
                if (unlikely(rv)) {
                        siw_dbg_qp(qp, "target mem check: %d\n", rv);
                        wqe->wc_status = SIW_WC_LOC_PROT_ERR;

                        siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
                                           DDP_ETYPE_TAGGED_BUF,
                                           siw_tagged_error(-rv), 0);

                        siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);

                        return -EINVAL;
                }
        }
        mem_p = *mem;

        bytes = min(srx->fpdu_part_rem, srx->skb_new);
        rv = siw_rx_data(mem_p, srx, &frx->pbl_idx,
                         sge->laddr + wqe->processed, bytes);
        if (rv != bytes) {
                wqe->wc_status = SIW_WC_GENERAL_ERR;
                rv = -EINVAL;
                goto error_term;
        }
        srx->fpdu_part_rem -= rv;
        srx->fpdu_part_rcvd += rv;
        wqe->processed += rv;

        if (!srx->fpdu_part_rem) {
                srx->ddp_to += srx->fpdu_part_rcvd;
                return 0;
        }
        return -EAGAIN;

error_term:
        siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, DDP_ETYPE_CATASTROPHIC,
                           DDP_ECODE_CATASTROPHIC, 0);
        return rv;
}

static void siw_update_skb_rcvd(struct siw_rx_stream *srx, u16 length)
{
        srx->skb_offset += length;
        srx->skb_new -= length;
        srx->skb_copied += length;
}

int siw_proc_terminate(struct siw_qp *qp)
{
        struct siw_rx_stream *srx = &qp->rx_stream;
        struct sk_buff *skb = srx->skb;
        struct iwarp_terminate *term = &srx->hdr.terminate;
        union iwarp_hdr term_info;
        u8 *infop = (u8 *)&term_info;
        enum rdma_opcode op;
        u16 to_copy = sizeof(struct iwarp_ctrl);

        pr_warn("siw: got TERMINATE. layer %d, type %d, code %d\n",
                __rdmap_term_layer(term), __rdmap_term_etype(term),
                __rdmap_term_ecode(term));

        if (be32_to_cpu(term->ddp_qn) != RDMAP_UNTAGGED_QN_TERMINATE ||
            be32_to_cpu(term->ddp_msn) !=
                    qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] ||
            be32_to_cpu(term->ddp_mo) != 0) {
                pr_warn("siw: rx bogus TERM [QN x%08x, MSN x%08x, MO x%08x]\n",
                        be32_to_cpu(term->ddp_qn), be32_to_cpu(term->ddp_msn),
                        be32_to_cpu(term->ddp_mo));
                return -ECONNRESET;
        }
        /*
         * Receive remaining pieces of TERM if indicated
         */
        if (!term->flag_m)
                return -ECONNRESET;

        /* Do not take the effort to reassemble a network fragmented
         * TERM message
         */
        if (srx->skb_new < sizeof(struct iwarp_ctrl_tagged))
                return -ECONNRESET;

        memset(infop, 0, sizeof(term_info));

        skb_copy_bits(skb, srx->skb_offset, infop, to_copy);

        op = __rdmap_get_opcode(&term_info.ctrl);
        if (op >= RDMAP_TERMINATE)
                goto out;

        infop += to_copy;
        siw_update_skb_rcvd(srx, to_copy);
        srx->fpdu_part_rcvd += to_copy;
        srx->fpdu_part_rem -= to_copy;

        to_copy = iwarp_pktinfo[op].hdr_len - to_copy;

        /* Again, no network fragmented TERM's */
        if (to_copy + MPA_CRC_SIZE > srx->skb_new)
                return -ECONNRESET;

        skb_copy_bits(skb, srx->skb_offset, infop, to_copy);

        if (term->flag_r) {
                siw_dbg_qp(qp, "TERM reports RDMAP hdr type %u, len %u (%s)\n",
                           op, be16_to_cpu(term_info.ctrl.mpa_len),
                           term->flag_m ? "valid" : "invalid");
        } else if (term->flag_d) {
                siw_dbg_qp(qp, "TERM reports DDP hdr type %u, len %u (%s)\n",
                           op, be16_to_cpu(term_info.ctrl.mpa_len),
                           term->flag_m ? "valid" : "invalid");
        }
out:
        siw_update_skb_rcvd(srx, to_copy);
        srx->fpdu_part_rcvd += to_copy;
        srx->fpdu_part_rem -= to_copy;

        return -ECONNRESET;
}

static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx)
{
        struct sk_buff *skb = srx->skb;
        int avail = min(srx->skb_new, srx->fpdu_part_rem);
        u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad;
        __wsum crc_in, crc_own = 0;

        siw_dbg_qp(qp, "expected %d, available %d, pad %u\n",
                   srx->fpdu_part_rem, srx->skb_new, srx->pad);

        skb_copy_bits(skb, srx->skb_offset, tbuf, avail);

        siw_update_skb_rcvd(srx, avail);
        srx->fpdu_part_rem -= avail;

        if (srx->fpdu_part_rem)
                return -EAGAIN;

        if (!srx->mpa_crc_enabled)
                return 0;

        if (srx->pad)
                siw_crc_update(&srx->mpa_crc, tbuf, srx->pad);
        /*
         * CRC32 is computed, transmitted and received directly in NBO,
         * so there's never a reason to convert byte order.
         */
        siw_crc_final(&srx->mpa_crc, (u8 *)&crc_own);
        crc_in = (__force __wsum)srx->trailer.crc;

        if (unlikely(crc_in != crc_own)) {
                pr_warn("siw: crc error. in: %08x, own %08x, op %u\n",
                        crc_in, crc_own, qp->rx_stream.rdmap_op);

                siw_init_terminate(qp, TERM_ERROR_LAYER_LLP,
                                   LLP_ETYPE_MPA,
                                   LLP_ECODE_RECEIVED_CRC, 0);
                return -EINVAL;
        }
        return 0;
}

#define MIN_DDP_HDR sizeof(struct iwarp_ctrl_tagged)

static int siw_get_hdr(struct siw_rx_stream *srx)
{
        struct sk_buff *skb = srx->skb;
        struct siw_qp *qp = rx_qp(srx);
        struct iwarp_ctrl *c_hdr = &srx->hdr.ctrl;
        struct siw_rx_fpdu *frx;
        u8 opcode;
        int bytes;

        if (srx->fpdu_part_rcvd < MIN_DDP_HDR) {
                /*
                 * copy a mimimum sized (tagged) DDP frame control part
                 */
                bytes = min_t(int, srx->skb_new,
                              MIN_DDP_HDR - srx->fpdu_part_rcvd);

                skb_copy_bits(skb, srx->skb_offset,
                              (char *)c_hdr + srx->fpdu_part_rcvd, bytes);

                siw_update_skb_rcvd(srx, bytes);
                srx->fpdu_part_rcvd += bytes;
                if (srx->fpdu_part_rcvd < MIN_DDP_HDR)
                        return -EAGAIN;

                if (unlikely(__ddp_get_version(c_hdr) != DDP_VERSION)) {
                        enum ddp_etype etype;
                        enum ddp_ecode ecode;

                        pr_warn("siw: received ddp version unsupported %d\n",
                                __ddp_get_version(c_hdr));

                        if (c_hdr->ddp_rdmap_ctrl & DDP_FLAG_TAGGED) {
                                etype = DDP_ETYPE_TAGGED_BUF;
                                ecode = DDP_ECODE_T_VERSION;
                        } else {
                                etype = DDP_ETYPE_UNTAGGED_BUF;
                                ecode = DDP_ECODE_UT_VERSION;
                        }
                        siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
                                           etype, ecode, 0);
                        return -EINVAL;
                }
                if (unlikely(__rdmap_get_version(c_hdr) != RDMAP_VERSION)) {
                        pr_warn("siw: received rdmap version unsupported %d\n",
                                __rdmap_get_version(c_hdr));

                        siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
                                           RDMAP_ETYPE_REMOTE_OPERATION,
                                           RDMAP_ECODE_VERSION, 0);
                        return -EINVAL;
                }
                opcode = __rdmap_get_opcode(c_hdr);

                if (opcode > RDMAP_TERMINATE) {
                        pr_warn("siw: received unknown packet type %u\n",
                                opcode);

                        siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
                                           RDMAP_ETYPE_REMOTE_OPERATION,
                                           RDMAP_ECODE_OPCODE, 0);
                        return -EINVAL;
                }
                siw_dbg_qp(rx_qp(srx), "new header, opcode %u\n", opcode);
        } else {
                opcode = __rdmap_get_opcode(c_hdr);
        }
        set_rx_fpdu_context(qp, opcode);
        frx = qp->rx_fpdu;

        /*
         * Figure out len of current hdr: variable length of
         * iwarp hdr may force us to copy hdr information in
         * two steps. Only tagged DDP messages are already
         * completely received.
         */
        if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) {
                int hdrlen = iwarp_pktinfo[opcode].hdr_len;

                bytes = min_t(int, hdrlen - MIN_DDP_HDR, srx->skb_new);

                skb_copy_bits(skb, srx->skb_offset,
                              (char *)c_hdr + srx->fpdu_part_rcvd, bytes);

                siw_update_skb_rcvd(srx, bytes);
                srx->fpdu_part_rcvd += bytes;
                if (srx->fpdu_part_rcvd < hdrlen)
                        return -EAGAIN;
        }

        /*
         * DDP/RDMAP header receive completed. Check if the current
         * DDP segment starts a new RDMAP message or continues a previously
         * started RDMAP message.
         *
         * Alternating reception of DDP segments (or FPDUs) from incomplete
         * tagged and untagged RDMAP messages is supported, as long as
         * the current tagged or untagged message gets eventually completed
         * w/o intersection from another message of the same type
         * (tagged/untagged). E.g., a WRITE can get intersected by a SEND,
         * but not by a READ RESPONSE etc.
         */
        if (srx->mpa_crc_enabled) {
                /*
                 * Restart CRC computation
                 */
                siw_crc_init(&srx->mpa_crc);
                siw_crc_update(&srx->mpa_crc, c_hdr, srx->fpdu_part_rcvd);
        }
        if (frx->more_ddp_segs) {
                frx->first_ddp_seg = 0;
                if (frx->prev_rdmap_op != opcode) {
                        pr_warn("siw: packet intersection: %u : %u\n",
                                frx->prev_rdmap_op, opcode);
                        /*
                         * The last inbound RDMA operation of same type
                         * (tagged or untagged) is left unfinished.
                         * To complete it in error, make it the current
                         * operation again, even with the header already
                         * overwritten. For error handling, only the opcode
                         * and current rx context are relevant.
                         */
                        set_rx_fpdu_context(qp, frx->prev_rdmap_op);
                        __rdmap_set_opcode(c_hdr, frx->prev_rdmap_op);
                        return -EPROTO;
                }
        } else {
                frx->prev_rdmap_op = opcode;
                frx->first_ddp_seg = 1;
        }
        frx->more_ddp_segs = c_hdr->ddp_rdmap_ctrl & DDP_FLAG_LAST ? 0 : 1;

        return 0;
}

static int siw_check_tx_fence(struct siw_qp *qp)
{
        struct siw_wqe *tx_waiting = tx_wqe(qp);
        struct siw_sqe *rreq;
        int resume_tx = 0, rv = 0;
        unsigned long flags;

        spin_lock_irqsave(&qp->orq_lock, flags);

        /* free current orq entry */
        rreq = orq_get_current(qp);
        WRITE_ONCE(rreq->flags, 0);

        qp->orq_get++;

        if (qp->tx_ctx.orq_fence) {
                if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) {
                        pr_warn("siw: [QP %u]: fence resume: bad status %d\n",
                                qp_id(qp), tx_waiting->wr_status);
                        rv = -EPROTO;
                        goto out;
                }
                /* resume SQ processing, if possible */
                if (tx_waiting->sqe.opcode == SIW_OP_READ ||
                    tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) {

                        /* SQ processing was stopped because of a full ORQ */
                        rreq = orq_get_free(qp);
                        if (unlikely(!rreq)) {
                                pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp));
                                rv = -EPROTO;
                                goto out;
                        }
                        siw_read_to_orq(rreq, &tx_waiting->sqe);

                        qp->orq_put++;
                        qp->tx_ctx.orq_fence = 0;
                        resume_tx = 1;

                } else if (siw_orq_empty(qp)) {
                        /*
                         * SQ processing was stopped by fenced work request.
                         * Resume since all previous Read's are now completed.
                         */
                        qp->tx_ctx.orq_fence = 0;
                        resume_tx = 1;
                }
        }
out:
        spin_unlock_irqrestore(&qp->orq_lock, flags);

        if (resume_tx)
                rv = siw_sq_start(qp);

        return rv;
}

/*
 * siw_rdmap_complete()
 *
 * Complete processing of an RDMA message after receiving all
 * DDP segmens or ABort processing after encountering error case.
 *
 *   o SENDs + RRESPs will need for completion,
 *   o RREQs need for  READ RESPONSE initialization
 *   o WRITEs need memory dereferencing
 *
 * TODO: Failed WRITEs need local error to be surfaced.
 */
static int siw_rdmap_complete(struct siw_qp *qp, int error)
{
        struct siw_rx_stream *srx = &qp->rx_stream;
        struct siw_wqe *wqe = rx_wqe(qp->rx_fpdu);
        enum siw_wc_status wc_status = wqe->wc_status;
        u8 opcode = __rdmap_get_opcode(&srx->hdr.ctrl);
        int rv = 0;

        switch (opcode) {
        case RDMAP_SEND_SE:
        case RDMAP_SEND_SE_INVAL:
                wqe->rqe.flags |= SIW_WQE_SOLICITED;
                fallthrough;

        case RDMAP_SEND:
        case RDMAP_SEND_INVAL:
                if (wqe->wr_status == SIW_WR_IDLE)
                        break;

                srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]++;

                if (error != 0 && wc_status == SIW_WC_SUCCESS)
                        wc_status = SIW_WC_GENERAL_ERR;
                /*
                 * Handle STag invalidation request
                 */
                if (wc_status == SIW_WC_SUCCESS &&
                    (opcode == RDMAP_SEND_INVAL ||
                     opcode == RDMAP_SEND_SE_INVAL)) {
                        rv = siw_invalidate_stag(qp->pd, srx->inval_stag);
                        if (rv) {
                                siw_init_terminate(
                                        qp, TERM_ERROR_LAYER_RDMAP,
                                        rv == -EACCES ?
                                                RDMAP_ETYPE_REMOTE_PROTECTION :
                                                RDMAP_ETYPE_REMOTE_OPERATION,
                                        RDMAP_ECODE_CANNOT_INVALIDATE, 0);

                                wc_status = SIW_WC_REM_INV_REQ_ERR;
                        }
                        rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
                                              rv ? 0 : srx->inval_stag,
                                              wc_status);
                } else {
                        rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
                                              0, wc_status);
                }
                siw_wqe_put_mem(wqe, SIW_OP_RECEIVE);
                break;

        case RDMAP_RDMA_READ_RESP:
                if (wqe->wr_status == SIW_WR_IDLE)
                        break;

                if (error != 0) {
                        if ((srx->state == SIW_GET_HDR &&
                             qp->rx_fpdu->first_ddp_seg) || error == -ENODATA)
                                /* possible RREQ in ORQ left untouched */
                                break;

                        if (wc_status == SIW_WC_SUCCESS)
                                wc_status = SIW_WC_GENERAL_ERR;
                } else if (rdma_is_kernel_res(&qp->base_qp.res) &&
                           rx_type(wqe) == SIW_OP_READ_LOCAL_INV) {
                        /*
                         * Handle any STag invalidation request
                         */
                        rv = siw_invalidate_stag(qp->pd, wqe->sqe.sge[0].lkey);
                        if (rv) {
                                siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
                                                   RDMAP_ETYPE_CATASTROPHIC,
                                                   RDMAP_ECODE_UNSPECIFIED, 0);

                                if (wc_status == SIW_WC_SUCCESS) {
                                        wc_status = SIW_WC_GENERAL_ERR;
                                        error = rv;
                                }
                        }
                }
                /*
                 * All errors turn the wqe into signalled.
                 */
                if ((wqe->sqe.flags & SIW_WQE_SIGNALLED) || error != 0)
                        rv = siw_sqe_complete(qp, &wqe->sqe, wqe->processed,
                                              wc_status);
                siw_wqe_put_mem(wqe, SIW_OP_READ);

                if (!error) {
                        rv = siw_check_tx_fence(qp);
                } else {
                        /* Disable current ORQ element */
                        if (qp->attrs.orq_size)
                                WRITE_ONCE(orq_get_current(qp)->flags, 0);
                }
                break;

        case RDMAP_RDMA_READ_REQ:
                if (!error) {
                        rv = siw_init_rresp(qp, srx);
                        srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]++;
                }
                break;

        case RDMAP_RDMA_WRITE:
                if (wqe->wr_status == SIW_WR_IDLE)
                        break;

                /*
                 * Free References from memory object if
                 * attached to receive context (inbound WRITE).
                 * While a zero-length WRITE is allowed,
                 * no memory reference got created.
                 */
                if (rx_mem(&qp->rx_tagged)) {
                        siw_mem_put(rx_mem(&qp->rx_tagged));
                        rx_mem(&qp->rx_tagged) = NULL;
                }
                break;

        default:
                break;
        }
        wqe->wr_status = SIW_WR_IDLE;

        return rv;
}

/*
 * siw_tcp_rx_data()
 *
 * Main routine to consume inbound TCP payload
 *
 * @rd_desc:    read descriptor
 * @skb:        socket buffer
 * @off:        offset in skb
 * @len:        skb->len - offset : payload in skb
 */
int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb,
                    unsigned int off, size_t len)
{
        struct siw_qp *qp = rd_desc->arg.data;
        struct siw_rx_stream *srx = &qp->rx_stream;
        int rv;

        srx->skb = skb;
        srx->skb_new = skb->len - off;
        srx->skb_offset = off;
        srx->skb_copied = 0;

        siw_dbg_qp(qp, "new data, len %d\n", srx->skb_new);

        while (srx->skb_new) {
                int run_completion = 1;

                if (unlikely(srx->rx_suspend)) {
                        /* Do not process any more data */
                        srx->skb_copied += srx->skb_new;
                        break;
                }
                switch (srx->state) {
                case SIW_GET_HDR:
                        rv = siw_get_hdr(srx);
                        if (!rv) {
                                srx->fpdu_part_rem =
                                        be16_to_cpu(srx->hdr.ctrl.mpa_len) -
                                        srx->fpdu_part_rcvd + MPA_HDR_SIZE;

                                if (srx->fpdu_part_rem)
                                        srx->pad = -srx->fpdu_part_rem & 0x3;
                                else
                                        srx->pad = 0;

                                srx->state = SIW_GET_DATA_START;
                                srx->fpdu_part_rcvd = 0;
                        }
                        break;

                case SIW_GET_DATA_MORE:
                        /*
                         * Another data fragment of the same DDP segment.
                         * Setting first_ddp_seg = 0 avoids repeating
                         * initializations that shall occur only once per
                         * DDP segment.
                         */
                        qp->rx_fpdu->first_ddp_seg = 0;
                        fallthrough;

                case SIW_GET_DATA_START:
                        /*
                         * Headers will be checked by the opcode-specific
                         * data receive function below.
                         */
                        rv = iwarp_pktinfo[qp->rx_stream.rdmap_op].rx_data(qp);
                        if (!rv) {
                                int mpa_len =
                                        be16_to_cpu(srx->hdr.ctrl.mpa_len)
                                        + MPA_HDR_SIZE;

                                srx->fpdu_part_rem = (-mpa_len & 0x3)
                                                      + MPA_CRC_SIZE;
                                srx->fpdu_part_rcvd = 0;
                                srx->state = SIW_GET_TRAILER;
                        } else {
                                if (unlikely(rv == -ECONNRESET))
                                        run_completion = 0;
                                else
                                        srx->state = SIW_GET_DATA_MORE;
                        }
                        break;

                case SIW_GET_TRAILER:
                        /*
                         * read CRC + any padding
                         */
                        rv = siw_get_trailer(qp, srx);
                        if (likely(!rv)) {
                                /*
                                 * FPDU completed.
                                 * complete RDMAP message if last fragment
                                 */
                                srx->state = SIW_GET_HDR;
                                srx->fpdu_part_rcvd = 0;

                                if (!(srx->hdr.ctrl.ddp_rdmap_ctrl &
                                      DDP_FLAG_LAST))
                                        /* more frags */
                                        break;

                                rv = siw_rdmap_complete(qp, 0);
                                run_completion = 0;
                        }
                        break;

                default:
                        pr_warn("QP[%u]: RX out of state\n", qp_id(qp));
                        rv = -EPROTO;
                        run_completion = 0;
                }
                if (unlikely(rv != 0 && rv != -EAGAIN)) {
                        if ((srx->state > SIW_GET_HDR ||
                             (qp->rx_fpdu && qp->rx_fpdu->more_ddp_segs)) &&
                            run_completion)
                                siw_rdmap_complete(qp, rv);

                        siw_dbg_qp(qp, "rx error %d, rx state %d\n", rv,
                                   srx->state);

                        siw_qp_cm_drop(qp, 1);

                        break;
                }
                if (rv) {
                        siw_dbg_qp(qp, "fpdu fragment, state %d, missing %d\n",
                                   srx->state, srx->fpdu_part_rem);
                        break;
                }
        }
        return srx->skb_copied;
}