root/drivers/target/iscsi/cxgbit/cxgbit_cm.c
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (c) 2016 Chelsio Communications, Inc.
 */

#include <linux/module.h>
#include <linux/list.h>
#include <linux/workqueue.h>
#include <linux/skbuff.h>
#include <linux/timer.h>
#include <linux/notifier.h>
#include <linux/inetdevice.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/if_vlan.h>

#include <net/neighbour.h>
#include <net/netevent.h>
#include <net/route.h>
#include <net/tcp.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>

#include <libcxgb_cm.h>
#include "cxgbit.h"
#include "clip_tbl.h"

static void cxgbit_init_wr_wait(struct cxgbit_wr_wait *wr_waitp)
{
        wr_waitp->ret = 0;
        reinit_completion(&wr_waitp->completion);
}

static void
cxgbit_wake_up(struct cxgbit_wr_wait *wr_waitp, const char *func, u8 ret)
{
        if (ret == CPL_ERR_NONE)
                wr_waitp->ret = 0;
        else
                wr_waitp->ret = -EIO;

        if (wr_waitp->ret)
                pr_err("%s: err:%u", func, ret);

        complete(&wr_waitp->completion);
}

static int
cxgbit_wait_for_reply(struct cxgbit_device *cdev,
                      struct cxgbit_wr_wait *wr_waitp, u32 tid, u32 timeout,
                      const char *func)
{
        int ret;

        if (!test_bit(CDEV_STATE_UP, &cdev->flags)) {
                wr_waitp->ret = -EIO;
                goto out;
        }

        ret = wait_for_completion_timeout(&wr_waitp->completion, timeout * HZ);
        if (!ret) {
                pr_info("%s - Device %s not responding tid %u\n",
                        func, pci_name(cdev->lldi.pdev), tid);
                wr_waitp->ret = -ETIMEDOUT;
        }
out:
        if (wr_waitp->ret)
                pr_info("%s: FW reply %d tid %u\n",
                        pci_name(cdev->lldi.pdev), wr_waitp->ret, tid);
        return wr_waitp->ret;
}

static int cxgbit_np_hashfn(const struct cxgbit_np *cnp)
{
        return ((unsigned long)cnp >> 10) & (NP_INFO_HASH_SIZE - 1);
}

static struct np_info *
cxgbit_np_hash_add(struct cxgbit_device *cdev, struct cxgbit_np *cnp,
                   unsigned int stid)
{
        struct np_info *p = kzalloc_obj(*p);

        if (p) {
                int bucket = cxgbit_np_hashfn(cnp);

                p->cnp = cnp;
                p->stid = stid;
                spin_lock(&cdev->np_lock);
                p->next = cdev->np_hash_tab[bucket];
                cdev->np_hash_tab[bucket] = p;
                spin_unlock(&cdev->np_lock);
        }

        return p;
}

static int
cxgbit_np_hash_find(struct cxgbit_device *cdev, struct cxgbit_np *cnp)
{
        int stid = -1, bucket = cxgbit_np_hashfn(cnp);
        struct np_info *p;

        spin_lock(&cdev->np_lock);
        for (p = cdev->np_hash_tab[bucket]; p; p = p->next) {
                if (p->cnp == cnp) {
                        stid = p->stid;
                        break;
                }
        }
        spin_unlock(&cdev->np_lock);

        return stid;
}

static int cxgbit_np_hash_del(struct cxgbit_device *cdev, struct cxgbit_np *cnp)
{
        int stid = -1, bucket = cxgbit_np_hashfn(cnp);
        struct np_info *p, **prev = &cdev->np_hash_tab[bucket];

        spin_lock(&cdev->np_lock);
        for (p = *prev; p; prev = &p->next, p = p->next) {
                if (p->cnp == cnp) {
                        stid = p->stid;
                        *prev = p->next;
                        kfree(p);
                        break;
                }
        }
        spin_unlock(&cdev->np_lock);

        return stid;
}

void _cxgbit_free_cnp(struct kref *kref)
{
        struct cxgbit_np *cnp;

        cnp = container_of(kref, struct cxgbit_np, kref);
        kfree(cnp);
}

static int
cxgbit_create_server6(struct cxgbit_device *cdev, unsigned int stid,
                      struct cxgbit_np *cnp)
{
        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
                                     &cnp->com.local_addr;
        int addr_type;
        int ret;

        pr_debug("%s: dev = %s; stid = %u; sin6_port = %u\n",
                 __func__, cdev->lldi.ports[0]->name, stid, sin6->sin6_port);

        addr_type = ipv6_addr_type((const struct in6_addr *)
                                   &sin6->sin6_addr);
        if (addr_type != IPV6_ADDR_ANY) {
                ret = cxgb4_clip_get(cdev->lldi.ports[0],
                                     (const u32 *)&sin6->sin6_addr.s6_addr, 1);
                if (ret) {
                        pr_err("Unable to find clip table entry. laddr %pI6. Error:%d.\n",
                               sin6->sin6_addr.s6_addr, ret);
                        return -ENOMEM;
                }
        }

        cxgbit_get_cnp(cnp);
        cxgbit_init_wr_wait(&cnp->com.wr_wait);

        ret = cxgb4_create_server6(cdev->lldi.ports[0],
                                   stid, &sin6->sin6_addr,
                                   sin6->sin6_port,
                                   cdev->lldi.rxq_ids[0]);
        if (!ret)
                ret = cxgbit_wait_for_reply(cdev, &cnp->com.wr_wait,
                                            0, 10, __func__);
        else if (ret > 0)
                ret = net_xmit_errno(ret);
        else
                cxgbit_put_cnp(cnp);

        if (ret) {
                if (ret != -ETIMEDOUT)
                        cxgb4_clip_release(cdev->lldi.ports[0],
                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);

                pr_err("create server6 err %d stid %d laddr %pI6 lport %d\n",
                       ret, stid, sin6->sin6_addr.s6_addr,
                       ntohs(sin6->sin6_port));
        }

        return ret;
}

static int
cxgbit_create_server4(struct cxgbit_device *cdev, unsigned int stid,
                      struct cxgbit_np *cnp)
{
        struct sockaddr_in *sin = (struct sockaddr_in *)
                                   &cnp->com.local_addr;
        int ret;

        pr_debug("%s: dev = %s; stid = %u; sin_port = %u\n",
                 __func__, cdev->lldi.ports[0]->name, stid, sin->sin_port);

        cxgbit_get_cnp(cnp);
        cxgbit_init_wr_wait(&cnp->com.wr_wait);

        ret = cxgb4_create_server(cdev->lldi.ports[0],
                                  stid, sin->sin_addr.s_addr,
                                  sin->sin_port, 0,
                                  cdev->lldi.rxq_ids[0]);
        if (!ret)
                ret = cxgbit_wait_for_reply(cdev,
                                            &cnp->com.wr_wait,
                                            0, 10, __func__);
        else if (ret > 0)
                ret = net_xmit_errno(ret);
        else
                cxgbit_put_cnp(cnp);

        if (ret)
                pr_err("create server failed err %d stid %d laddr %pI4 lport %d\n",
                       ret, stid, &sin->sin_addr, ntohs(sin->sin_port));
        return ret;
}

struct cxgbit_device *cxgbit_find_device(struct net_device *ndev, u8 *port_id)
{
        struct cxgbit_device *cdev;
        u8 i;

        list_for_each_entry(cdev, &cdev_list_head, list) {
                struct cxgb4_lld_info *lldi = &cdev->lldi;

                for (i = 0; i < lldi->nports; i++) {
                        if (lldi->ports[i] == ndev) {
                                if (port_id)
                                        *port_id = i;
                                return cdev;
                        }
                }
        }

        return NULL;
}

static struct net_device *cxgbit_get_real_dev(struct net_device *ndev)
{
        if (ndev->priv_flags & IFF_BONDING) {
                pr_err("Bond devices are not supported. Interface:%s\n",
                       ndev->name);
                return NULL;
        }

        if (is_vlan_dev(ndev))
                return vlan_dev_real_dev(ndev);

        return ndev;
}

static struct net_device *cxgbit_ipv4_netdev(__be32 saddr)
{
        struct net_device *ndev;

        ndev = __ip_dev_find(&init_net, saddr, false);
        if (!ndev)
                return NULL;

        return cxgbit_get_real_dev(ndev);
}

static struct net_device *cxgbit_ipv6_netdev(struct in6_addr *addr6)
{
        struct net_device *ndev = NULL;
        bool found = false;

        if (IS_ENABLED(CONFIG_IPV6)) {
                for_each_netdev_rcu(&init_net, ndev)
                        if (ipv6_chk_addr(&init_net, addr6, ndev, 1)) {
                                found = true;
                                break;
                        }
        }
        if (!found)
                return NULL;
        return cxgbit_get_real_dev(ndev);
}

static struct cxgbit_device *cxgbit_find_np_cdev(struct cxgbit_np *cnp)
{
        struct sockaddr_storage *sockaddr = &cnp->com.local_addr;
        int ss_family = sockaddr->ss_family;
        struct net_device *ndev = NULL;
        struct cxgbit_device *cdev = NULL;

        rcu_read_lock();
        if (ss_family == AF_INET) {
                struct sockaddr_in *sin;

                sin = (struct sockaddr_in *)sockaddr;
                ndev = cxgbit_ipv4_netdev(sin->sin_addr.s_addr);
        } else if (ss_family == AF_INET6) {
                struct sockaddr_in6 *sin6;

                sin6 = (struct sockaddr_in6 *)sockaddr;
                ndev = cxgbit_ipv6_netdev(&sin6->sin6_addr);
        }
        if (!ndev)
                goto out;

        cdev = cxgbit_find_device(ndev, NULL);
out:
        rcu_read_unlock();
        return cdev;
}

static bool cxgbit_inaddr_any(struct cxgbit_np *cnp)
{
        struct sockaddr_storage *sockaddr = &cnp->com.local_addr;
        int ss_family = sockaddr->ss_family;
        int addr_type;

        if (ss_family == AF_INET) {
                struct sockaddr_in *sin;

                sin = (struct sockaddr_in *)sockaddr;
                if (sin->sin_addr.s_addr == htonl(INADDR_ANY))
                        return true;
        } else if (ss_family == AF_INET6) {
                struct sockaddr_in6 *sin6;

                sin6 = (struct sockaddr_in6 *)sockaddr;
                addr_type = ipv6_addr_type((const struct in6_addr *)
                                &sin6->sin6_addr);
                if (addr_type == IPV6_ADDR_ANY)
                        return true;
        }
        return false;
}

static int
__cxgbit_setup_cdev_np(struct cxgbit_device *cdev, struct cxgbit_np *cnp)
{
        int stid, ret;
        int ss_family = cnp->com.local_addr.ss_family;

        if (!test_bit(CDEV_STATE_UP, &cdev->flags))
                return -EINVAL;

        stid = cxgb4_alloc_stid(cdev->lldi.tids, ss_family, cnp);
        if (stid < 0)
                return -EINVAL;

        if (!cxgbit_np_hash_add(cdev, cnp, stid)) {
                cxgb4_free_stid(cdev->lldi.tids, stid, ss_family);
                return -EINVAL;
        }

        if (ss_family == AF_INET)
                ret = cxgbit_create_server4(cdev, stid, cnp);
        else
                ret = cxgbit_create_server6(cdev, stid, cnp);

        if (ret) {
                if (ret != -ETIMEDOUT)
                        cxgb4_free_stid(cdev->lldi.tids, stid,
                                        ss_family);
                cxgbit_np_hash_del(cdev, cnp);
                return ret;
        }
        return ret;
}

static int cxgbit_setup_cdev_np(struct cxgbit_np *cnp)
{
        struct cxgbit_device *cdev;
        int ret = -1;

        mutex_lock(&cdev_list_lock);
        cdev = cxgbit_find_np_cdev(cnp);
        if (!cdev)
                goto out;

        if (cxgbit_np_hash_find(cdev, cnp) >= 0)
                goto out;

        if (__cxgbit_setup_cdev_np(cdev, cnp))
                goto out;

        cnp->com.cdev = cdev;
        ret = 0;
out:
        mutex_unlock(&cdev_list_lock);
        return ret;
}

static int cxgbit_setup_all_np(struct cxgbit_np *cnp)
{
        struct cxgbit_device *cdev;
        int ret;
        u32 count = 0;

        mutex_lock(&cdev_list_lock);
        list_for_each_entry(cdev, &cdev_list_head, list) {
                if (cxgbit_np_hash_find(cdev, cnp) >= 0) {
                        mutex_unlock(&cdev_list_lock);
                        return -1;
                }
        }

        list_for_each_entry(cdev, &cdev_list_head, list) {
                ret = __cxgbit_setup_cdev_np(cdev, cnp);
                if (ret == -ETIMEDOUT)
                        break;
                if (ret != 0)
                        continue;
                count++;
        }
        mutex_unlock(&cdev_list_lock);

        return count ? 0 : -1;
}

int cxgbit_setup_np(struct iscsi_np *np, struct sockaddr_storage *ksockaddr)
{
        struct cxgbit_np *cnp;
        int ret;

        if ((ksockaddr->ss_family != AF_INET) &&
            (ksockaddr->ss_family != AF_INET6))
                return -EINVAL;

        cnp = kzalloc_obj(*cnp);
        if (!cnp)
                return -ENOMEM;

        init_waitqueue_head(&cnp->accept_wait);
        init_completion(&cnp->com.wr_wait.completion);
        init_completion(&cnp->accept_comp);
        INIT_LIST_HEAD(&cnp->np_accept_list);
        spin_lock_init(&cnp->np_accept_lock);
        kref_init(&cnp->kref);
        memcpy(&np->np_sockaddr, ksockaddr,
               sizeof(struct sockaddr_storage));
        memcpy(&cnp->com.local_addr, &np->np_sockaddr,
               sizeof(cnp->com.local_addr));

        cnp->np = np;
        cnp->com.cdev = NULL;

        if (cxgbit_inaddr_any(cnp))
                ret = cxgbit_setup_all_np(cnp);
        else
                ret = cxgbit_setup_cdev_np(cnp);

        if (ret) {
                cxgbit_put_cnp(cnp);
                return -EINVAL;
        }

        np->np_context = cnp;
        cnp->com.state = CSK_STATE_LISTEN;
        return 0;
}

static void
cxgbit_set_conn_info(struct iscsi_np *np, struct iscsit_conn *conn,
                     struct cxgbit_sock *csk)
{
        conn->login_family = np->np_sockaddr.ss_family;
        conn->login_sockaddr = csk->com.remote_addr;
        conn->local_sockaddr = csk->com.local_addr;
}

int cxgbit_accept_np(struct iscsi_np *np, struct iscsit_conn *conn)
{
        struct cxgbit_np *cnp = np->np_context;
        struct cxgbit_sock *csk;
        int ret = 0;

accept_wait:
        ret = wait_for_completion_interruptible(&cnp->accept_comp);
        if (ret)
                return -ENODEV;

        spin_lock_bh(&np->np_thread_lock);
        if (np->np_thread_state >= ISCSI_NP_THREAD_RESET) {
                spin_unlock_bh(&np->np_thread_lock);
                /**
                 * No point in stalling here when np_thread
                 * is in state RESET/SHUTDOWN/EXIT - bail
                 **/
                return -ENODEV;
        }
        spin_unlock_bh(&np->np_thread_lock);

        spin_lock_bh(&cnp->np_accept_lock);
        if (list_empty(&cnp->np_accept_list)) {
                spin_unlock_bh(&cnp->np_accept_lock);
                goto accept_wait;
        }

        csk = list_first_entry(&cnp->np_accept_list,
                               struct cxgbit_sock,
                               accept_node);

        list_del_init(&csk->accept_node);
        spin_unlock_bh(&cnp->np_accept_lock);
        conn->context = csk;
        csk->conn = conn;

        cxgbit_set_conn_info(np, conn, csk);
        return 0;
}

static int
__cxgbit_free_cdev_np(struct cxgbit_device *cdev, struct cxgbit_np *cnp)
{
        int stid, ret;
        bool ipv6 = false;

        stid = cxgbit_np_hash_del(cdev, cnp);
        if (stid < 0)
                return -EINVAL;
        if (!test_bit(CDEV_STATE_UP, &cdev->flags))
                return -EINVAL;

        if (cnp->np->np_sockaddr.ss_family == AF_INET6)
                ipv6 = true;

        cxgbit_get_cnp(cnp);
        cxgbit_init_wr_wait(&cnp->com.wr_wait);
        ret = cxgb4_remove_server(cdev->lldi.ports[0], stid,
                                  cdev->lldi.rxq_ids[0], ipv6);

        if (ret > 0)
                ret = net_xmit_errno(ret);

        if (ret) {
                cxgbit_put_cnp(cnp);
                return ret;
        }

        ret = cxgbit_wait_for_reply(cdev, &cnp->com.wr_wait,
                                    0, 10, __func__);
        if (ret == -ETIMEDOUT)
                return ret;

        if (ipv6 && cnp->com.cdev) {
                struct sockaddr_in6 *sin6;

                sin6 = (struct sockaddr_in6 *)&cnp->com.local_addr;
                cxgb4_clip_release(cdev->lldi.ports[0],
                                   (const u32 *)&sin6->sin6_addr.s6_addr,
                                   1);
        }

        cxgb4_free_stid(cdev->lldi.tids, stid,
                        cnp->com.local_addr.ss_family);
        return 0;
}

static void cxgbit_free_all_np(struct cxgbit_np *cnp)
{
        struct cxgbit_device *cdev;
        int ret;

        mutex_lock(&cdev_list_lock);
        list_for_each_entry(cdev, &cdev_list_head, list) {
                ret = __cxgbit_free_cdev_np(cdev, cnp);
                if (ret == -ETIMEDOUT)
                        break;
        }
        mutex_unlock(&cdev_list_lock);
}

static void cxgbit_free_cdev_np(struct cxgbit_np *cnp)
{
        struct cxgbit_device *cdev;
        bool found = false;

        mutex_lock(&cdev_list_lock);
        list_for_each_entry(cdev, &cdev_list_head, list) {
                if (cdev == cnp->com.cdev) {
                        found = true;
                        break;
                }
        }
        if (!found)
                goto out;

        __cxgbit_free_cdev_np(cdev, cnp);
out:
        mutex_unlock(&cdev_list_lock);
}

static void __cxgbit_free_conn(struct cxgbit_sock *csk);

void cxgbit_free_np(struct iscsi_np *np)
{
        struct cxgbit_np *cnp = np->np_context;
        struct cxgbit_sock *csk, *tmp;

        cnp->com.state = CSK_STATE_DEAD;
        if (cnp->com.cdev)
                cxgbit_free_cdev_np(cnp);
        else
                cxgbit_free_all_np(cnp);

        spin_lock_bh(&cnp->np_accept_lock);
        list_for_each_entry_safe(csk, tmp, &cnp->np_accept_list, accept_node) {
                list_del_init(&csk->accept_node);
                __cxgbit_free_conn(csk);
        }
        spin_unlock_bh(&cnp->np_accept_lock);

        np->np_context = NULL;
        cxgbit_put_cnp(cnp);
}

static void cxgbit_send_halfclose(struct cxgbit_sock *csk)
{
        struct sk_buff *skb;
        u32 len = roundup(sizeof(struct cpl_close_con_req), 16);

        skb = alloc_skb(len, GFP_ATOMIC);
        if (!skb)
                return;

        cxgb_mk_close_con_req(skb, len, csk->tid, csk->txq_idx,
                              NULL, NULL);

        cxgbit_skcb_flags(skb) |= SKCBF_TX_FLAG_COMPL;
        __skb_queue_tail(&csk->txq, skb);
        cxgbit_push_tx_frames(csk);
}

static void cxgbit_arp_failure_discard(void *handle, struct sk_buff *skb)
{
        struct cxgbit_sock *csk = handle;

        pr_debug("%s cxgbit_device %p\n", __func__, handle);
        kfree_skb(skb);
        cxgbit_put_csk(csk);
}

static void cxgbit_abort_arp_failure(void *handle, struct sk_buff *skb)
{
        struct cxgbit_device *cdev = handle;
        struct cpl_abort_req *req = cplhdr(skb);

        pr_debug("%s cdev %p\n", __func__, cdev);
        req->cmd = CPL_ABORT_NO_RST;
        cxgbit_ofld_send(cdev, skb);
}

static int cxgbit_send_abort_req(struct cxgbit_sock *csk)
{
        struct sk_buff *skb;
        u32 len = roundup(sizeof(struct cpl_abort_req), 16);

        pr_debug("%s: csk %p tid %u; state %d\n",
                 __func__, csk, csk->tid, csk->com.state);

        __skb_queue_purge(&csk->txq);

        if (!test_and_set_bit(CSK_TX_DATA_SENT, &csk->com.flags))
                cxgbit_send_tx_flowc_wr(csk);

        skb = __skb_dequeue(&csk->skbq);
        cxgb_mk_abort_req(skb, len, csk->tid, csk->txq_idx,
                          csk->com.cdev, cxgbit_abort_arp_failure);

        return cxgbit_l2t_send(csk->com.cdev, skb, csk->l2t);
}

static void
__cxgbit_abort_conn(struct cxgbit_sock *csk, struct sk_buff *skb)
{
        __kfree_skb(skb);

        if (csk->com.state != CSK_STATE_ESTABLISHED)
                goto no_abort;

        set_bit(CSK_ABORT_RPL_WAIT, &csk->com.flags);
        csk->com.state = CSK_STATE_ABORTING;

        cxgbit_send_abort_req(csk);

        return;

no_abort:
        cxgbit_wake_up(&csk->com.wr_wait, __func__, CPL_ERR_NONE);
        cxgbit_put_csk(csk);
}

void cxgbit_abort_conn(struct cxgbit_sock *csk)
{
        struct sk_buff *skb = alloc_skb(0, GFP_KERNEL | __GFP_NOFAIL);

        cxgbit_get_csk(csk);
        cxgbit_init_wr_wait(&csk->com.wr_wait);

        spin_lock_bh(&csk->lock);
        if (csk->lock_owner) {
                cxgbit_skcb_rx_backlog_fn(skb) = __cxgbit_abort_conn;
                __skb_queue_tail(&csk->backlogq, skb);
        } else {
                __cxgbit_abort_conn(csk, skb);
        }
        spin_unlock_bh(&csk->lock);

        cxgbit_wait_for_reply(csk->com.cdev, &csk->com.wr_wait,
                              csk->tid, 600, __func__);
}

static void __cxgbit_free_conn(struct cxgbit_sock *csk)
{
        struct iscsit_conn *conn = csk->conn;
        bool release = false;

        pr_debug("%s: state %d\n",
                 __func__, csk->com.state);

        spin_lock_bh(&csk->lock);
        switch (csk->com.state) {
        case CSK_STATE_ESTABLISHED:
                if (conn && (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT)) {
                        csk->com.state = CSK_STATE_CLOSING;
                        cxgbit_send_halfclose(csk);
                } else {
                        csk->com.state = CSK_STATE_ABORTING;
                        cxgbit_send_abort_req(csk);
                }
                break;
        case CSK_STATE_CLOSING:
                csk->com.state = CSK_STATE_MORIBUND;
                cxgbit_send_halfclose(csk);
                break;
        case CSK_STATE_DEAD:
                release = true;
                break;
        default:
                pr_err("%s: csk %p; state %d\n",
                       __func__, csk, csk->com.state);
        }
        spin_unlock_bh(&csk->lock);

        if (release)
                cxgbit_put_csk(csk);
}

void cxgbit_free_conn(struct iscsit_conn *conn)
{
        __cxgbit_free_conn(conn->context);
}

static void cxgbit_set_emss(struct cxgbit_sock *csk, u16 opt)
{
        csk->emss = csk->com.cdev->lldi.mtus[TCPOPT_MSS_G(opt)] -
                        ((csk->com.remote_addr.ss_family == AF_INET) ?
                        sizeof(struct iphdr) : sizeof(struct ipv6hdr)) -
                        sizeof(struct tcphdr);
        csk->mss = csk->emss;
        if (TCPOPT_TSTAMP_G(opt))
                csk->emss -= round_up(TCPOLEN_TIMESTAMP, 4);
        if (csk->emss < 128)
                csk->emss = 128;
        if (csk->emss & 7)
                pr_info("Warning: misaligned mtu idx %u mss %u emss=%u\n",
                        TCPOPT_MSS_G(opt), csk->mss, csk->emss);
        pr_debug("%s mss_idx %u mss %u emss=%u\n", __func__, TCPOPT_MSS_G(opt),
                 csk->mss, csk->emss);
}

static void cxgbit_free_skb(struct cxgbit_sock *csk)
{
        struct sk_buff *skb;

        __skb_queue_purge(&csk->txq);
        __skb_queue_purge(&csk->rxq);
        __skb_queue_purge(&csk->backlogq);
        __skb_queue_purge(&csk->ppodq);
        __skb_queue_purge(&csk->skbq);

        while ((skb = cxgbit_sock_dequeue_wr(csk)))
                kfree_skb(skb);

        __kfree_skb(csk->lro_hskb);
}

void _cxgbit_free_csk(struct kref *kref)
{
        struct cxgbit_sock *csk;
        struct cxgbit_device *cdev;

        csk = container_of(kref, struct cxgbit_sock, kref);

        pr_debug("%s csk %p state %d\n", __func__, csk, csk->com.state);

        if (csk->com.local_addr.ss_family == AF_INET6) {
                struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
                                             &csk->com.local_addr;
                cxgb4_clip_release(csk->com.cdev->lldi.ports[0],
                                   (const u32 *)
                                   &sin6->sin6_addr.s6_addr, 1);
        }

        cxgb4_remove_tid(csk->com.cdev->lldi.tids, 0, csk->tid,
                         csk->com.local_addr.ss_family);
        dst_release(csk->dst);
        cxgb4_l2t_release(csk->l2t);

        cdev = csk->com.cdev;
        spin_lock_bh(&cdev->cskq.lock);
        list_del(&csk->list);
        spin_unlock_bh(&cdev->cskq.lock);

        cxgbit_free_skb(csk);
        cxgbit_put_cnp(csk->cnp);
        cxgbit_put_cdev(cdev);

        kfree(csk);
}

static void cxgbit_set_tcp_window(struct cxgbit_sock *csk, struct port_info *pi)
{
        unsigned int linkspeed;
        u8 scale;

        linkspeed = pi->link_cfg.speed;
        scale = linkspeed / SPEED_10000;

#define CXGBIT_10G_RCV_WIN (256 * 1024)
        csk->rcv_win = CXGBIT_10G_RCV_WIN;
        if (scale)
                csk->rcv_win *= scale;
        csk->rcv_win = min(csk->rcv_win, RCV_BUFSIZ_M << 10);

#define CXGBIT_10G_SND_WIN (256 * 1024)
        csk->snd_win = CXGBIT_10G_SND_WIN;
        if (scale)
                csk->snd_win *= scale;
        csk->snd_win = min(csk->snd_win, 512U * 1024);

        pr_debug("%s snd_win %d rcv_win %d\n",
                 __func__, csk->snd_win, csk->rcv_win);
}

#ifdef CONFIG_CHELSIO_T4_DCB
static u8 cxgbit_get_iscsi_dcb_state(struct net_device *ndev)
{
        return ndev->dcbnl_ops->getstate(ndev);
}

static int cxgbit_select_priority(int pri_mask)
{
        if (!pri_mask)
                return 0;

        return (ffs(pri_mask) - 1);
}

static u8 cxgbit_get_iscsi_dcb_priority(struct net_device *ndev, u16 local_port)
{
        int ret;
        u8 caps;

        struct dcb_app iscsi_dcb_app = {
                .protocol = local_port
        };

        ret = (int)ndev->dcbnl_ops->getcap(ndev, DCB_CAP_ATTR_DCBX, &caps);

        if (ret)
                return 0;

        if (caps & DCB_CAP_DCBX_VER_IEEE) {
                iscsi_dcb_app.selector = IEEE_8021QAZ_APP_SEL_STREAM;
                ret = dcb_ieee_getapp_mask(ndev, &iscsi_dcb_app);
                if (!ret) {
                        iscsi_dcb_app.selector = IEEE_8021QAZ_APP_SEL_ANY;
                        ret = dcb_ieee_getapp_mask(ndev, &iscsi_dcb_app);
                }
        } else if (caps & DCB_CAP_DCBX_VER_CEE) {
                iscsi_dcb_app.selector = DCB_APP_IDTYPE_PORTNUM;

                ret = dcb_getapp(ndev, &iscsi_dcb_app);
        }

        pr_info("iSCSI priority is set to %u\n", cxgbit_select_priority(ret));

        return cxgbit_select_priority(ret);
}
#endif

static int
cxgbit_offload_init(struct cxgbit_sock *csk, int iptype, __u8 *peer_ip,
                    u16 local_port, struct dst_entry *dst,
                    struct cxgbit_device *cdev)
{
        struct neighbour *n;
        int ret, step;
        struct net_device *ndev;
        u16 rxq_idx, port_id;
#ifdef CONFIG_CHELSIO_T4_DCB
        u8 priority = 0;
#endif

        n = dst_neigh_lookup(dst, peer_ip);
        if (!n)
                return -ENODEV;

        rcu_read_lock();
        if (!(n->nud_state & NUD_VALID))
                neigh_event_send(n, NULL);

        ret = -ENOMEM;
        if (n->dev->flags & IFF_LOOPBACK) {
                if (iptype == 4)
                        ndev = cxgbit_ipv4_netdev(*(__be32 *)peer_ip);
                else if (IS_ENABLED(CONFIG_IPV6))
                        ndev = cxgbit_ipv6_netdev((struct in6_addr *)peer_ip);
                else
                        ndev = NULL;

                if (!ndev) {
                        ret = -ENODEV;
                        goto out;
                }

                csk->l2t = cxgb4_l2t_get(cdev->lldi.l2t,
                                         n, ndev, 0);
                if (!csk->l2t)
                        goto out;
                csk->mtu = ndev->mtu;
                csk->tx_chan = cxgb4_port_chan(ndev);
                csk->smac_idx =
                               ((struct port_info *)netdev_priv(ndev))->smt_idx;
                step = cdev->lldi.ntxq /
                        cdev->lldi.nchan;
                csk->txq_idx = cxgb4_port_idx(ndev) * step;
                step = cdev->lldi.nrxq /
                        cdev->lldi.nchan;
                csk->ctrlq_idx = cxgb4_port_idx(ndev);
                csk->rss_qid = cdev->lldi.rxq_ids[
                                cxgb4_port_idx(ndev) * step];
                csk->port_id = cxgb4_port_idx(ndev);
                cxgbit_set_tcp_window(csk,
                                      (struct port_info *)netdev_priv(ndev));
        } else {
                ndev = cxgbit_get_real_dev(n->dev);
                if (!ndev) {
                        ret = -ENODEV;
                        goto out;
                }

#ifdef CONFIG_CHELSIO_T4_DCB
                if (cxgbit_get_iscsi_dcb_state(ndev))
                        priority = cxgbit_get_iscsi_dcb_priority(ndev,
                                                                 local_port);

                csk->dcb_priority = priority;

                csk->l2t = cxgb4_l2t_get(cdev->lldi.l2t, n, ndev, priority);
#else
                csk->l2t = cxgb4_l2t_get(cdev->lldi.l2t, n, ndev, 0);
#endif
                if (!csk->l2t)
                        goto out;
                port_id = cxgb4_port_idx(ndev);
                csk->mtu = dst_mtu(dst);
                csk->tx_chan = cxgb4_port_chan(ndev);
                csk->smac_idx =
                               ((struct port_info *)netdev_priv(ndev))->smt_idx;
                step = cdev->lldi.ntxq /
                        cdev->lldi.nports;
                csk->txq_idx = (port_id * step) +
                                (cdev->selectq[port_id][0]++ % step);
                csk->ctrlq_idx = cxgb4_port_idx(ndev);
                step = cdev->lldi.nrxq /
                        cdev->lldi.nports;
                rxq_idx = (port_id * step) +
                                (cdev->selectq[port_id][1]++ % step);
                csk->rss_qid = cdev->lldi.rxq_ids[rxq_idx];
                csk->port_id = port_id;
                cxgbit_set_tcp_window(csk,
                                      (struct port_info *)netdev_priv(ndev));
        }
        ret = 0;
out:
        rcu_read_unlock();
        neigh_release(n);
        return ret;
}

int cxgbit_ofld_send(struct cxgbit_device *cdev, struct sk_buff *skb)
{
        int ret = 0;

        if (!test_bit(CDEV_STATE_UP, &cdev->flags)) {
                kfree_skb(skb);
                pr_err("%s - device not up - dropping\n", __func__);
                return -EIO;
        }

        ret = cxgb4_ofld_send(cdev->lldi.ports[0], skb);
        if (ret < 0)
                kfree_skb(skb);
        return ret < 0 ? ret : 0;
}

static void cxgbit_release_tid(struct cxgbit_device *cdev, u32 tid)
{
        u32 len = roundup(sizeof(struct cpl_tid_release), 16);
        struct sk_buff *skb;

        skb = alloc_skb(len, GFP_ATOMIC);
        if (!skb)
                return;

        cxgb_mk_tid_release(skb, len, tid, 0);
        cxgbit_ofld_send(cdev, skb);
}

int
cxgbit_l2t_send(struct cxgbit_device *cdev, struct sk_buff *skb,
                struct l2t_entry *l2e)
{
        int ret = 0;

        if (!test_bit(CDEV_STATE_UP, &cdev->flags)) {
                kfree_skb(skb);
                pr_err("%s - device not up - dropping\n", __func__);
                return -EIO;
        }

        ret = cxgb4_l2t_send(cdev->lldi.ports[0], skb, l2e);
        if (ret < 0)
                kfree_skb(skb);
        return ret < 0 ? ret : 0;
}

static void cxgbit_send_rx_credits(struct cxgbit_sock *csk, struct sk_buff *skb)
{
        if (csk->com.state != CSK_STATE_ESTABLISHED) {
                __kfree_skb(skb);
                return;
        }

        cxgbit_ofld_send(csk->com.cdev, skb);
}

/*
 * CPL connection rx data ack: host ->
 * Send RX credits through an RX_DATA_ACK CPL message.
 * Returns the number of credits sent.
 */
int cxgbit_rx_data_ack(struct cxgbit_sock *csk)
{
        struct sk_buff *skb;
        u32 len = roundup(sizeof(struct cpl_rx_data_ack), 16);
        u32 credit_dack;

        skb = alloc_skb(len, GFP_KERNEL);
        if (!skb)
                return -1;

        credit_dack = RX_DACK_CHANGE_F | RX_DACK_MODE_V(3) |
                      RX_CREDITS_V(csk->rx_credits);

        cxgb_mk_rx_data_ack(skb, len, csk->tid, csk->ctrlq_idx,
                            credit_dack);

        csk->rx_credits = 0;

        spin_lock_bh(&csk->lock);
        if (csk->lock_owner) {
                cxgbit_skcb_rx_backlog_fn(skb) = cxgbit_send_rx_credits;
                __skb_queue_tail(&csk->backlogq, skb);
                spin_unlock_bh(&csk->lock);
                return 0;
        }

        cxgbit_send_rx_credits(csk, skb);
        spin_unlock_bh(&csk->lock);

        return 0;
}

#define FLOWC_WR_NPARAMS_MIN    9
#define FLOWC_WR_NPARAMS_MAX    11
static int cxgbit_alloc_csk_skb(struct cxgbit_sock *csk)
{
        struct sk_buff *skb;
        u32 len, flowclen;
        u8 i;

        flowclen = offsetof(struct fw_flowc_wr,
                            mnemval[FLOWC_WR_NPARAMS_MAX]);

        len = max_t(u32, sizeof(struct cpl_abort_req),
                    sizeof(struct cpl_abort_rpl));

        len = max(len, flowclen);
        len = roundup(len, 16);

        for (i = 0; i < 3; i++) {
                skb = alloc_skb(len, GFP_ATOMIC);
                if (!skb)
                        goto out;
                __skb_queue_tail(&csk->skbq, skb);
        }

        skb = alloc_skb(LRO_SKB_MIN_HEADROOM, GFP_ATOMIC);
        if (!skb)
                goto out;

        memset(skb->data, 0, LRO_SKB_MIN_HEADROOM);
        csk->lro_hskb = skb;

        return 0;
out:
        __skb_queue_purge(&csk->skbq);
        return -ENOMEM;
}

static void
cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req)
{
        struct sk_buff *skb;
        const struct tcphdr *tcph;
        struct cpl_t5_pass_accept_rpl *rpl5;
        struct cxgb4_lld_info *lldi = &csk->com.cdev->lldi;
        unsigned int len = roundup(sizeof(*rpl5), 16);
        unsigned int mtu_idx;
        u64 opt0;
        u32 opt2, hlen;
        u32 wscale;
        u32 win;

        pr_debug("%s csk %p tid %u\n", __func__, csk, csk->tid);

        skb = alloc_skb(len, GFP_ATOMIC);
        if (!skb) {
                cxgbit_put_csk(csk);
                return;
        }

        rpl5 = __skb_put_zero(skb, len);

        INIT_TP_WR(rpl5, csk->tid);
        OPCODE_TID(rpl5) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
                                                     csk->tid));
        cxgb_best_mtu(csk->com.cdev->lldi.mtus, csk->mtu, &mtu_idx,
                      req->tcpopt.tstamp,
                      (csk->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
        wscale = cxgb_compute_wscale(csk->rcv_win);
        /*
         * Specify the largest window that will fit in opt0. The
         * remainder will be specified in the rx_data_ack.
         */
        win = csk->rcv_win >> 10;
        if (win > RCV_BUFSIZ_M)
                win = RCV_BUFSIZ_M;
        opt0 =  TCAM_BYPASS_F |
                WND_SCALE_V(wscale) |
                MSS_IDX_V(mtu_idx) |
                L2T_IDX_V(csk->l2t->idx) |
                TX_CHAN_V(csk->tx_chan) |
                SMAC_SEL_V(csk->smac_idx) |
                DSCP_V(csk->tos >> 2) |
                ULP_MODE_V(ULP_MODE_ISCSI) |
                RCV_BUFSIZ_V(win);

        opt2 = RX_CHANNEL_V(0) |
                RSS_QUEUE_VALID_F | RSS_QUEUE_V(csk->rss_qid);

        if (!is_t5(lldi->adapter_type))
                opt2 |= RX_FC_DISABLE_F;

        if (req->tcpopt.tstamp)
                opt2 |= TSTAMPS_EN_F;
        if (req->tcpopt.sack)
                opt2 |= SACK_EN_F;
        if (wscale)
                opt2 |= WND_SCALE_EN_F;

        hlen = ntohl(req->hdr_len);

        if (is_t5(lldi->adapter_type))
                tcph = (struct tcphdr *)((u8 *)(req + 1) +
                       ETH_HDR_LEN_G(hlen) + IP_HDR_LEN_G(hlen));
        else
                tcph = (struct tcphdr *)((u8 *)(req + 1) +
                       T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen));

        if (tcph->ece && tcph->cwr)
                opt2 |= CCTRL_ECN_V(1);

        opt2 |= CONG_CNTRL_V(CONG_ALG_NEWRENO);

        opt2 |= T5_ISS_F;
        rpl5->iss = cpu_to_be32((get_random_u32() & ~7UL) - 1);

        opt2 |= T5_OPT_2_VALID_F;

        rpl5->opt0 = cpu_to_be64(opt0);
        rpl5->opt2 = cpu_to_be32(opt2);
        set_wr_txq(skb, CPL_PRIORITY_SETUP, csk->ctrlq_idx);
        t4_set_arp_err_handler(skb, csk, cxgbit_arp_failure_discard);
        cxgbit_l2t_send(csk->com.cdev, skb, csk->l2t);
}

static void
cxgbit_pass_accept_req(struct cxgbit_device *cdev, struct sk_buff *skb)
{
        struct cxgbit_sock *csk = NULL;
        struct cxgbit_np *cnp;
        struct cpl_pass_accept_req *req = cplhdr(skb);
        unsigned int stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
        struct tid_info *t = cdev->lldi.tids;
        unsigned int tid = GET_TID(req);
        u16 peer_mss = ntohs(req->tcpopt.mss);
        unsigned short hdrs;

        struct dst_entry *dst;
        __u8 local_ip[16], peer_ip[16];
        __be16 local_port, peer_port;
        int ret;
        int iptype;

        pr_debug("%s: cdev = %p; stid = %u; tid = %u\n",
                 __func__, cdev, stid, tid);

        cnp = lookup_stid(t, stid);
        if (!cnp) {
                pr_err("%s connect request on invalid stid %d\n",
                       __func__, stid);
                goto rel_skb;
        }

        if (cnp->com.state != CSK_STATE_LISTEN) {
                pr_err("%s - listening parent not in CSK_STATE_LISTEN\n",
                       __func__);
                goto reject;
        }

        csk = lookup_tid(t, tid);
        if (csk) {
                pr_err("%s csk not null tid %u\n",
                       __func__, tid);
                goto rel_skb;
        }

        cxgb_get_4tuple(req, cdev->lldi.adapter_type, &iptype, local_ip,
                        peer_ip, &local_port, &peer_port);

        /* Find output route */
        if (iptype == 4)  {
                pr_debug("%s parent sock %p tid %u laddr %pI4 raddr %pI4 "
                         "lport %d rport %d peer_mss %d\n"
                         , __func__, cnp, tid,
                         local_ip, peer_ip, ntohs(local_port),
                         ntohs(peer_port), peer_mss);
                dst = cxgb_find_route(&cdev->lldi, cxgbit_get_real_dev,
                                      *(__be32 *)local_ip,
                                      *(__be32 *)peer_ip,
                                      local_port, peer_port,
                                      PASS_OPEN_TOS_G(ntohl(req->tos_stid)));
        } else {
                pr_debug("%s parent sock %p tid %u laddr %pI6 raddr %pI6 "
                         "lport %d rport %d peer_mss %d\n"
                         , __func__, cnp, tid,
                         local_ip, peer_ip, ntohs(local_port),
                         ntohs(peer_port), peer_mss);
                dst = cxgb_find_route6(&cdev->lldi, cxgbit_get_real_dev,
                                       local_ip, peer_ip,
                                       local_port, peer_port,
                                       PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
                                       ((struct sockaddr_in6 *)
                                        &cnp->com.local_addr)->sin6_scope_id);
        }
        if (!dst) {
                pr_err("%s - failed to find dst entry!\n",
                       __func__);
                goto reject;
        }

        csk = kzalloc_obj(*csk, GFP_ATOMIC);
        if (!csk) {
                dst_release(dst);
                goto rel_skb;
        }

        ret = cxgbit_offload_init(csk, iptype, peer_ip, ntohs(local_port),
                                  dst, cdev);
        if (ret) {
                pr_err("%s - failed to allocate l2t entry!\n",
                       __func__);
                dst_release(dst);
                kfree(csk);
                goto reject;
        }

        kref_init(&csk->kref);
        init_completion(&csk->com.wr_wait.completion);

        INIT_LIST_HEAD(&csk->accept_node);

        hdrs = (iptype == 4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) +
                sizeof(struct tcphdr) + (req->tcpopt.tstamp ? 12 : 0);
        if (peer_mss && csk->mtu > (peer_mss + hdrs))
                csk->mtu = peer_mss + hdrs;

        csk->com.state = CSK_STATE_CONNECTING;
        csk->com.cdev = cdev;
        csk->cnp = cnp;
        csk->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
        csk->dst = dst;
        csk->tid = tid;
        csk->wr_cred = cdev->lldi.wr_cred -
                        DIV_ROUND_UP(sizeof(struct cpl_abort_req), 16);
        csk->wr_max_cred = csk->wr_cred;
        csk->wr_una_cred = 0;

        if (iptype == 4) {
                struct sockaddr_in *sin = (struct sockaddr_in *)
                                          &csk->com.local_addr;
                sin->sin_family = AF_INET;
                sin->sin_port = local_port;
                sin->sin_addr.s_addr = *(__be32 *)local_ip;

                sin = (struct sockaddr_in *)&csk->com.remote_addr;
                sin->sin_family = AF_INET;
                sin->sin_port = peer_port;
                sin->sin_addr.s_addr = *(__be32 *)peer_ip;
        } else {
                struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
                                            &csk->com.local_addr;

                sin6->sin6_family = PF_INET6;
                sin6->sin6_port = local_port;
                memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
                cxgb4_clip_get(cdev->lldi.ports[0],
                               (const u32 *)&sin6->sin6_addr.s6_addr,
                               1);

                sin6 = (struct sockaddr_in6 *)&csk->com.remote_addr;
                sin6->sin6_family = PF_INET6;
                sin6->sin6_port = peer_port;
                memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16);
        }

        skb_queue_head_init(&csk->rxq);
        skb_queue_head_init(&csk->txq);
        skb_queue_head_init(&csk->ppodq);
        skb_queue_head_init(&csk->backlogq);
        skb_queue_head_init(&csk->skbq);
        cxgbit_sock_reset_wr_list(csk);
        spin_lock_init(&csk->lock);
        init_waitqueue_head(&csk->waitq);
        csk->lock_owner = false;

        if (cxgbit_alloc_csk_skb(csk)) {
                dst_release(dst);
                kfree(csk);
                goto rel_skb;
        }

        cxgbit_get_cnp(cnp);
        cxgbit_get_cdev(cdev);

        spin_lock(&cdev->cskq.lock);
        list_add_tail(&csk->list, &cdev->cskq.list);
        spin_unlock(&cdev->cskq.lock);
        cxgb4_insert_tid(t, csk, tid, csk->com.local_addr.ss_family);
        cxgbit_pass_accept_rpl(csk, req);
        goto rel_skb;

reject:
        cxgbit_release_tid(cdev, tid);
rel_skb:
        __kfree_skb(skb);
}

static u32
cxgbit_tx_flowc_wr_credits(struct cxgbit_sock *csk, u32 *nparamsp,
                           u32 *flowclenp)
{
        u32 nparams, flowclen16, flowclen;

        nparams = FLOWC_WR_NPARAMS_MIN;

        if (csk->snd_wscale)
                nparams++;

#ifdef CONFIG_CHELSIO_T4_DCB
        nparams++;
#endif
        flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
        flowclen16 = DIV_ROUND_UP(flowclen, 16);
        flowclen = flowclen16 * 16;
        /*
         * Return the number of 16-byte credits used by the flowc request.
         * Pass back the nparams and actual flowc length if requested.
         */
        if (nparamsp)
                *nparamsp = nparams;
        if (flowclenp)
                *flowclenp = flowclen;
        return flowclen16;
}

u32 cxgbit_send_tx_flowc_wr(struct cxgbit_sock *csk)
{
        struct cxgbit_device *cdev = csk->com.cdev;
        struct fw_flowc_wr *flowc;
        u32 nparams, flowclen16, flowclen;
        struct sk_buff *skb;
        u8 index;

#ifdef CONFIG_CHELSIO_T4_DCB
        u16 vlan = ((struct l2t_entry *)csk->l2t)->vlan;
#endif

        flowclen16 = cxgbit_tx_flowc_wr_credits(csk, &nparams, &flowclen);

        skb = __skb_dequeue(&csk->skbq);
        flowc = __skb_put_zero(skb, flowclen);

        flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
                                           FW_FLOWC_WR_NPARAMS_V(nparams));
        flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
                                          FW_WR_FLOWID_V(csk->tid));
        flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
        flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V
                                            (csk->com.cdev->lldi.pf));
        flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
        flowc->mnemval[1].val = cpu_to_be32(csk->tx_chan);
        flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
        flowc->mnemval[2].val = cpu_to_be32(csk->tx_chan);
        flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
        flowc->mnemval[3].val = cpu_to_be32(csk->rss_qid);
        flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT;
        flowc->mnemval[4].val = cpu_to_be32(csk->snd_nxt);
        flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
        flowc->mnemval[5].val = cpu_to_be32(csk->rcv_nxt);
        flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
        flowc->mnemval[6].val = cpu_to_be32(csk->snd_win);
        flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
        flowc->mnemval[7].val = cpu_to_be32(csk->emss);

        flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_TXDATAPLEN_MAX;
        if (test_bit(CDEV_ISO_ENABLE, &cdev->flags))
                flowc->mnemval[8].val = cpu_to_be32(CXGBIT_MAX_ISO_PAYLOAD);
        else
                flowc->mnemval[8].val = cpu_to_be32(16384);

        index = 9;

        if (csk->snd_wscale) {
                flowc->mnemval[index].mnemonic = FW_FLOWC_MNEM_RCV_SCALE;
                flowc->mnemval[index].val = cpu_to_be32(csk->snd_wscale);
                index++;
        }

#ifdef CONFIG_CHELSIO_T4_DCB
        flowc->mnemval[index].mnemonic = FW_FLOWC_MNEM_DCBPRIO;
        if (vlan == VLAN_NONE) {
                pr_warn("csk %u without VLAN Tag on DCB Link\n", csk->tid);
                flowc->mnemval[index].val = cpu_to_be32(0);
        } else
                flowc->mnemval[index].val = cpu_to_be32(
                                (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT);
#endif

        pr_debug("%s: csk %p; tx_chan = %u; rss_qid = %u; snd_seq = %u;"
                 " rcv_seq = %u; snd_win = %u; emss = %u\n",
                 __func__, csk, csk->tx_chan, csk->rss_qid, csk->snd_nxt,
                 csk->rcv_nxt, csk->snd_win, csk->emss);
        set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx);
        cxgbit_ofld_send(csk->com.cdev, skb);
        return flowclen16;
}

static int
cxgbit_send_tcb_skb(struct cxgbit_sock *csk, struct sk_buff *skb)
{
        spin_lock_bh(&csk->lock);
        if (unlikely(csk->com.state != CSK_STATE_ESTABLISHED)) {
                spin_unlock_bh(&csk->lock);
                pr_err("%s: csk 0x%p, tid %u, state %u\n",
                       __func__, csk, csk->tid, csk->com.state);
                __kfree_skb(skb);
                return -1;
        }

        cxgbit_get_csk(csk);
        cxgbit_init_wr_wait(&csk->com.wr_wait);
        cxgbit_ofld_send(csk->com.cdev, skb);
        spin_unlock_bh(&csk->lock);

        return 0;
}

int cxgbit_setup_conn_digest(struct cxgbit_sock *csk)
{
        struct sk_buff *skb;
        struct cpl_set_tcb_field *req;
        u8 hcrc = csk->submode & CXGBIT_SUBMODE_HCRC;
        u8 dcrc = csk->submode & CXGBIT_SUBMODE_DCRC;
        unsigned int len = roundup(sizeof(*req), 16);
        int ret;

        skb = alloc_skb(len, GFP_KERNEL);
        if (!skb)
                return -ENOMEM;

        /*  set up ulp submode */
        req = __skb_put_zero(skb, len);

        INIT_TP_WR(req, csk->tid);
        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, csk->tid));
        req->reply_ctrl = htons(NO_REPLY_V(0) | QUEUENO_V(csk->rss_qid));
        req->word_cookie = htons(0);
        req->mask = cpu_to_be64(0x3 << 4);
        req->val = cpu_to_be64(((hcrc ? ULP_CRC_HEADER : 0) |
                                (dcrc ? ULP_CRC_DATA : 0)) << 4);
        set_wr_txq(skb, CPL_PRIORITY_CONTROL, csk->ctrlq_idx);

        if (cxgbit_send_tcb_skb(csk, skb))
                return -1;

        ret = cxgbit_wait_for_reply(csk->com.cdev,
                                    &csk->com.wr_wait,
                                    csk->tid, 5, __func__);
        if (ret)
                return -1;

        return 0;
}

int cxgbit_setup_conn_pgidx(struct cxgbit_sock *csk, u32 pg_idx)
{
        struct sk_buff *skb;
        struct cpl_set_tcb_field *req;
        unsigned int len = roundup(sizeof(*req), 16);
        int ret;

        skb = alloc_skb(len, GFP_KERNEL);
        if (!skb)
                return -ENOMEM;

        req = __skb_put_zero(skb, len);

        INIT_TP_WR(req, csk->tid);
        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, csk->tid));
        req->reply_ctrl = htons(NO_REPLY_V(0) | QUEUENO_V(csk->rss_qid));
        req->word_cookie = htons(0);
        req->mask = cpu_to_be64(0x3 << 8);
        req->val = cpu_to_be64(pg_idx << 8);
        set_wr_txq(skb, CPL_PRIORITY_CONTROL, csk->ctrlq_idx);

        if (cxgbit_send_tcb_skb(csk, skb))
                return -1;

        ret = cxgbit_wait_for_reply(csk->com.cdev,
                                    &csk->com.wr_wait,
                                    csk->tid, 5, __func__);
        if (ret)
                return -1;

        return 0;
}

static void
cxgbit_pass_open_rpl(struct cxgbit_device *cdev, struct sk_buff *skb)
{
        struct cpl_pass_open_rpl *rpl = cplhdr(skb);
        struct tid_info *t = cdev->lldi.tids;
        unsigned int stid = GET_TID(rpl);
        struct cxgbit_np *cnp = lookup_stid(t, stid);

        pr_debug("%s: cnp = %p; stid = %u; status = %d\n",
                 __func__, cnp, stid, rpl->status);

        if (!cnp) {
                pr_info("%s stid %d lookup failure\n", __func__, stid);
                goto rel_skb;
        }

        cxgbit_wake_up(&cnp->com.wr_wait, __func__, rpl->status);
        cxgbit_put_cnp(cnp);
rel_skb:
        __kfree_skb(skb);
}

static void
cxgbit_close_listsrv_rpl(struct cxgbit_device *cdev, struct sk_buff *skb)
{
        struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
        struct tid_info *t = cdev->lldi.tids;
        unsigned int stid = GET_TID(rpl);
        struct cxgbit_np *cnp = lookup_stid(t, stid);

        pr_debug("%s: cnp = %p; stid = %u; status = %d\n",
                 __func__, cnp, stid, rpl->status);

        if (!cnp) {
                pr_info("%s stid %d lookup failure\n", __func__, stid);
                goto rel_skb;
        }

        cxgbit_wake_up(&cnp->com.wr_wait, __func__, rpl->status);
        cxgbit_put_cnp(cnp);
rel_skb:
        __kfree_skb(skb);
}

static void
cxgbit_pass_establish(struct cxgbit_device *cdev, struct sk_buff *skb)
{
        struct cpl_pass_establish *req = cplhdr(skb);
        struct tid_info *t = cdev->lldi.tids;
        unsigned int tid = GET_TID(req);
        struct cxgbit_sock *csk;
        struct cxgbit_np *cnp;
        u16 tcp_opt = be16_to_cpu(req->tcp_opt);
        u32 snd_isn = be32_to_cpu(req->snd_isn);
        u32 rcv_isn = be32_to_cpu(req->rcv_isn);

        csk = lookup_tid(t, tid);
        if (unlikely(!csk)) {
                pr_err("can't find connection for tid %u.\n", tid);
                goto rel_skb;
        }
        cnp = csk->cnp;

        pr_debug("%s: csk %p; tid %u; cnp %p\n",
                 __func__, csk, tid, cnp);

        csk->write_seq = snd_isn;
        csk->snd_una = snd_isn;
        csk->snd_nxt = snd_isn;

        csk->rcv_nxt = rcv_isn;

        csk->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
        cxgbit_set_emss(csk, tcp_opt);
        dst_confirm(csk->dst);
        csk->com.state = CSK_STATE_ESTABLISHED;
        spin_lock_bh(&cnp->np_accept_lock);
        list_add_tail(&csk->accept_node, &cnp->np_accept_list);
        spin_unlock_bh(&cnp->np_accept_lock);
        complete(&cnp->accept_comp);
rel_skb:
        __kfree_skb(skb);
}

static void cxgbit_queue_rx_skb(struct cxgbit_sock *csk, struct sk_buff *skb)
{
        cxgbit_skcb_flags(skb) = 0;
        spin_lock_bh(&csk->rxq.lock);
        __skb_queue_tail(&csk->rxq, skb);
        spin_unlock_bh(&csk->rxq.lock);
        wake_up(&csk->waitq);
}

static void cxgbit_peer_close(struct cxgbit_sock *csk, struct sk_buff *skb)
{
        pr_debug("%s: csk %p; tid %u; state %d\n",
                 __func__, csk, csk->tid, csk->com.state);

        switch (csk->com.state) {
        case CSK_STATE_ESTABLISHED:
                csk->com.state = CSK_STATE_CLOSING;
                cxgbit_queue_rx_skb(csk, skb);
                return;
        case CSK_STATE_CLOSING:
                /* simultaneous close */
                csk->com.state = CSK_STATE_MORIBUND;
                break;
        case CSK_STATE_MORIBUND:
                csk->com.state = CSK_STATE_DEAD;
                cxgbit_put_csk(csk);
                break;
        case CSK_STATE_ABORTING:
                break;
        default:
                pr_info("%s: cpl_peer_close in bad state %d\n",
                        __func__, csk->com.state);
        }

        __kfree_skb(skb);
}

static void cxgbit_close_con_rpl(struct cxgbit_sock *csk, struct sk_buff *skb)
{
        pr_debug("%s: csk %p; tid %u; state %d\n",
                 __func__, csk, csk->tid, csk->com.state);

        switch (csk->com.state) {
        case CSK_STATE_CLOSING:
                csk->com.state = CSK_STATE_MORIBUND;
                break;
        case CSK_STATE_MORIBUND:
                csk->com.state = CSK_STATE_DEAD;
                cxgbit_put_csk(csk);
                break;
        case CSK_STATE_ABORTING:
        case CSK_STATE_DEAD:
                break;
        default:
                pr_info("%s: cpl_close_con_rpl in bad state %d\n",
                        __func__, csk->com.state);
        }

        __kfree_skb(skb);
}

static void cxgbit_abort_req_rss(struct cxgbit_sock *csk, struct sk_buff *skb)
{
        struct cpl_abort_req_rss *hdr = cplhdr(skb);
        unsigned int tid = GET_TID(hdr);
        struct sk_buff *rpl_skb;
        bool release = false;
        bool wakeup_thread = false;
        u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);

        pr_debug("%s: csk %p; tid %u; state %d\n",
                 __func__, csk, tid, csk->com.state);

        if (cxgb_is_neg_adv(hdr->status)) {
                pr_err("%s: got neg advise %d on tid %u\n",
                       __func__, hdr->status, tid);
                goto rel_skb;
        }

        switch (csk->com.state) {
        case CSK_STATE_CONNECTING:
        case CSK_STATE_MORIBUND:
                csk->com.state = CSK_STATE_DEAD;
                release = true;
                break;
        case CSK_STATE_ESTABLISHED:
                csk->com.state = CSK_STATE_DEAD;
                wakeup_thread = true;
                break;
        case CSK_STATE_CLOSING:
                csk->com.state = CSK_STATE_DEAD;
                if (!csk->conn)
                        release = true;
                break;
        case CSK_STATE_ABORTING:
                break;
        default:
                pr_info("%s: cpl_abort_req_rss in bad state %d\n",
                        __func__, csk->com.state);
                csk->com.state = CSK_STATE_DEAD;
        }

        __skb_queue_purge(&csk->txq);

        if (!test_and_set_bit(CSK_TX_DATA_SENT, &csk->com.flags))
                cxgbit_send_tx_flowc_wr(csk);

        rpl_skb = __skb_dequeue(&csk->skbq);

        cxgb_mk_abort_rpl(rpl_skb, len, csk->tid, csk->txq_idx);
        cxgbit_ofld_send(csk->com.cdev, rpl_skb);

        if (wakeup_thread) {
                cxgbit_queue_rx_skb(csk, skb);
                return;
        }

        if (release)
                cxgbit_put_csk(csk);
rel_skb:
        __kfree_skb(skb);
}

static void cxgbit_abort_rpl_rss(struct cxgbit_sock *csk, struct sk_buff *skb)
{
        struct cpl_abort_rpl_rss *rpl = cplhdr(skb);

        pr_debug("%s: csk %p; tid %u; state %d\n",
                 __func__, csk, csk->tid, csk->com.state);

        switch (csk->com.state) {
        case CSK_STATE_ABORTING:
                csk->com.state = CSK_STATE_DEAD;
                if (test_bit(CSK_ABORT_RPL_WAIT, &csk->com.flags))
                        cxgbit_wake_up(&csk->com.wr_wait, __func__,
                                       rpl->status);
                cxgbit_put_csk(csk);
                break;
        default:
                pr_info("%s: cpl_abort_rpl_rss in state %d\n",
                        __func__, csk->com.state);
        }

        __kfree_skb(skb);
}

static bool cxgbit_credit_err(const struct cxgbit_sock *csk)
{
        const struct sk_buff *skb = csk->wr_pending_head;
        u32 credit = 0;

        if (unlikely(csk->wr_cred > csk->wr_max_cred)) {
                pr_err("csk 0x%p, tid %u, credit %u > %u\n",
                       csk, csk->tid, csk->wr_cred, csk->wr_max_cred);
                return true;
        }

        while (skb) {
                credit += (__force u32)skb->csum;
                skb = cxgbit_skcb_tx_wr_next(skb);
        }

        if (unlikely((csk->wr_cred + credit) != csk->wr_max_cred)) {
                pr_err("csk 0x%p, tid %u, credit %u + %u != %u.\n",
                       csk, csk->tid, csk->wr_cred,
                       credit, csk->wr_max_cred);

                return true;
        }

        return false;
}

static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb)
{
        struct cpl_fw4_ack *rpl = (struct cpl_fw4_ack *)cplhdr(skb);
        u32 credits = rpl->credits;
        u32 snd_una = ntohl(rpl->snd_una);

        csk->wr_cred += credits;
        if (csk->wr_una_cred > (csk->wr_max_cred - csk->wr_cred))
                csk->wr_una_cred = csk->wr_max_cred - csk->wr_cred;

        while (credits) {
                struct sk_buff *p = cxgbit_sock_peek_wr(csk);
                u32 csum;

                if (unlikely(!p)) {
                        pr_err("csk 0x%p,%u, cr %u,%u+%u, empty.\n",
                               csk, csk->tid, credits,
                               csk->wr_cred, csk->wr_una_cred);
                        break;
                }

                csum = (__force u32)p->csum;
                if (unlikely(credits < csum)) {
                        pr_warn("csk 0x%p,%u, cr %u,%u+%u, < %u.\n",
                                csk,  csk->tid,
                                credits, csk->wr_cred, csk->wr_una_cred,
                                csum);
                        p->csum = (__force __wsum)(csum - credits);
                        break;
                }

                cxgbit_sock_dequeue_wr(csk);
                credits -= csum;
                kfree_skb(p);
        }

        if (unlikely(cxgbit_credit_err(csk))) {
                cxgbit_queue_rx_skb(csk, skb);
                return;
        }

        if (rpl->seq_vld & CPL_FW4_ACK_FLAGS_SEQVAL) {
                if (unlikely(before(snd_una, csk->snd_una))) {
                        pr_warn("csk 0x%p,%u, snd_una %u/%u.",
                                csk, csk->tid, snd_una,
                                csk->snd_una);
                        goto rel_skb;
                }

                if (csk->snd_una != snd_una) {
                        csk->snd_una = snd_una;
                        dst_confirm(csk->dst);
                }
        }

        if (skb_queue_len(&csk->txq))
                cxgbit_push_tx_frames(csk);

rel_skb:
        __kfree_skb(skb);
}

static void cxgbit_set_tcb_rpl(struct cxgbit_device *cdev, struct sk_buff *skb)
{
        struct cxgbit_sock *csk;
        struct cpl_set_tcb_rpl *rpl = (struct cpl_set_tcb_rpl *)skb->data;
        unsigned int tid = GET_TID(rpl);
        struct cxgb4_lld_info *lldi = &cdev->lldi;
        struct tid_info *t = lldi->tids;

        csk = lookup_tid(t, tid);
        if (unlikely(!csk)) {
                pr_err("can't find connection for tid %u.\n", tid);
                goto rel_skb;
        } else {
                cxgbit_wake_up(&csk->com.wr_wait, __func__, rpl->status);
        }

        cxgbit_put_csk(csk);
rel_skb:
        __kfree_skb(skb);
}

static void cxgbit_rx_data(struct cxgbit_device *cdev, struct sk_buff *skb)
{
        struct cxgbit_sock *csk;
        struct cpl_rx_data *cpl = cplhdr(skb);
        unsigned int tid = GET_TID(cpl);
        struct cxgb4_lld_info *lldi = &cdev->lldi;
        struct tid_info *t = lldi->tids;

        csk = lookup_tid(t, tid);
        if (unlikely(!csk)) {
                pr_err("can't find conn. for tid %u.\n", tid);
                goto rel_skb;
        }

        cxgbit_queue_rx_skb(csk, skb);
        return;
rel_skb:
        __kfree_skb(skb);
}

static void
__cxgbit_process_rx_cpl(struct cxgbit_sock *csk, struct sk_buff *skb)
{
        spin_lock(&csk->lock);
        if (csk->lock_owner) {
                __skb_queue_tail(&csk->backlogq, skb);
                spin_unlock(&csk->lock);
                return;
        }

        cxgbit_skcb_rx_backlog_fn(skb)(csk, skb);
        spin_unlock(&csk->lock);
}

static void cxgbit_process_rx_cpl(struct cxgbit_sock *csk, struct sk_buff *skb)
{
        cxgbit_get_csk(csk);
        __cxgbit_process_rx_cpl(csk, skb);
        cxgbit_put_csk(csk);
}

static void cxgbit_rx_cpl(struct cxgbit_device *cdev, struct sk_buff *skb)
{
        struct cxgbit_sock *csk;
        struct cpl_tx_data *cpl = cplhdr(skb);
        struct cxgb4_lld_info *lldi = &cdev->lldi;
        struct tid_info *t = lldi->tids;
        unsigned int tid = GET_TID(cpl);
        u8 opcode = cxgbit_skcb_rx_opcode(skb);
        bool ref = true;

        switch (opcode) {
        case CPL_FW4_ACK:
                        cxgbit_skcb_rx_backlog_fn(skb) = cxgbit_fw4_ack;
                        ref = false;
                        break;
        case CPL_PEER_CLOSE:
                        cxgbit_skcb_rx_backlog_fn(skb) = cxgbit_peer_close;
                        break;
        case CPL_CLOSE_CON_RPL:
                        cxgbit_skcb_rx_backlog_fn(skb) = cxgbit_close_con_rpl;
                        break;
        case CPL_ABORT_REQ_RSS:
                        cxgbit_skcb_rx_backlog_fn(skb) = cxgbit_abort_req_rss;
                        break;
        case CPL_ABORT_RPL_RSS:
                        cxgbit_skcb_rx_backlog_fn(skb) = cxgbit_abort_rpl_rss;
                        break;
        default:
                goto rel_skb;
        }

        csk = lookup_tid(t, tid);
        if (unlikely(!csk)) {
                pr_err("can't find conn. for tid %u.\n", tid);
                goto rel_skb;
        }

        if (ref)
                cxgbit_process_rx_cpl(csk, skb);
        else
                __cxgbit_process_rx_cpl(csk, skb);

        return;
rel_skb:
        __kfree_skb(skb);
}

cxgbit_cplhandler_func cxgbit_cplhandlers[NUM_CPL_CMDS] = {
        [CPL_PASS_OPEN_RPL]     = cxgbit_pass_open_rpl,
        [CPL_CLOSE_LISTSRV_RPL] = cxgbit_close_listsrv_rpl,
        [CPL_PASS_ACCEPT_REQ]   = cxgbit_pass_accept_req,
        [CPL_PASS_ESTABLISH]    = cxgbit_pass_establish,
        [CPL_SET_TCB_RPL]       = cxgbit_set_tcb_rpl,
        [CPL_RX_DATA]           = cxgbit_rx_data,
        [CPL_FW4_ACK]           = cxgbit_rx_cpl,
        [CPL_PEER_CLOSE]        = cxgbit_rx_cpl,
        [CPL_CLOSE_CON_RPL]     = cxgbit_rx_cpl,
        [CPL_ABORT_REQ_RSS]     = cxgbit_rx_cpl,
        [CPL_ABORT_RPL_RSS]     = cxgbit_rx_cpl,
};