root/drivers/net/ethernet/sfc/mcdi_filters.c
// SPDX-License-Identifier: GPL-2.0-only
/****************************************************************************
 * Driver for Solarflare network controllers and boards
 * Copyright 2005-2018 Solarflare Communications Inc.
 * Copyright 2019-2020 Xilinx Inc.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published
 * by the Free Software Foundation, incorporated herein by reference.
 */

#include "mcdi_filters.h"
#include "mcdi.h"
#include "nic.h"
#include "rx_common.h"

/* The maximum size of a shared RSS context */
/* TODO: this should really be from the mcdi protocol export */
#define EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE 64UL

#define EFX_EF10_FILTER_ID_INVALID 0xffff

/* An arbitrary search limit for the software hash table */
#define EFX_EF10_FILTER_SEARCH_LIMIT 200

static struct efx_filter_spec *
efx_mcdi_filter_entry_spec(const struct efx_mcdi_filter_table *table,
                           unsigned int filter_idx)
{
        return (struct efx_filter_spec *)(table->entry[filter_idx].spec &
                                          ~EFX_EF10_FILTER_FLAGS);
}

static unsigned int
efx_mcdi_filter_entry_flags(const struct efx_mcdi_filter_table *table,
                           unsigned int filter_idx)
{
        return table->entry[filter_idx].spec & EFX_EF10_FILTER_FLAGS;
}

static u32 efx_mcdi_filter_get_unsafe_id(u32 filter_id)
{
        WARN_ON_ONCE(filter_id == EFX_EF10_FILTER_ID_INVALID);
        return filter_id & (EFX_MCDI_FILTER_TBL_ROWS - 1);
}

static unsigned int efx_mcdi_filter_get_unsafe_pri(u32 filter_id)
{
        return filter_id / (EFX_MCDI_FILTER_TBL_ROWS * 2);
}

static u32 efx_mcdi_filter_make_filter_id(unsigned int pri, u16 idx)
{
        return pri * EFX_MCDI_FILTER_TBL_ROWS * 2 + idx;
}

/*
 * Decide whether a filter should be exclusive or else should allow
 * delivery to additional recipients.  Currently we decide that
 * filters for specific local unicast MAC and IP addresses are
 * exclusive.
 */
static bool efx_mcdi_filter_is_exclusive(const struct efx_filter_spec *spec)
{
        if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC &&
            !is_multicast_ether_addr(spec->loc_mac))
                return true;

        if ((spec->match_flags &
             (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
            (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
                if (spec->ether_type == htons(ETH_P_IP) &&
                    !ipv4_is_multicast(spec->loc_host[0]))
                        return true;
                if (spec->ether_type == htons(ETH_P_IPV6) &&
                    ((const u8 *)spec->loc_host)[0] != 0xff)
                        return true;
        }

        return false;
}

static void
efx_mcdi_filter_set_entry(struct efx_mcdi_filter_table *table,
                          unsigned int filter_idx,
                          const struct efx_filter_spec *spec,
                          unsigned int flags)
{
        table->entry[filter_idx].spec = (unsigned long)spec | flags;
}

static void
efx_mcdi_filter_push_prep_set_match_fields(struct efx_nic *efx,
                                           const struct efx_filter_spec *spec,
                                           efx_dword_t *inbuf)
{
        enum efx_encap_type encap_type = efx_filter_get_encap_type(spec);
        u32 match_fields = 0, uc_match, mc_match;

        MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
                       efx_mcdi_filter_is_exclusive(spec) ?
                       MC_CMD_FILTER_OP_IN_OP_INSERT :
                       MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE);

        /*
         * Convert match flags and values.  Unlike almost
         * everything else in MCDI, these fields are in
         * network byte order.
         */
#define COPY_VALUE(value, mcdi_field)                                        \
        do {                                                         \
                match_fields |=                                      \
                        1 << MC_CMD_FILTER_OP_IN_MATCH_ ##           \
                        mcdi_field ## _LBN;                          \
                BUILD_BUG_ON(                                        \
                        MC_CMD_FILTER_OP_IN_ ## mcdi_field ## _LEN < \
                        sizeof(value));                              \
                memcpy(MCDI_PTR(inbuf, FILTER_OP_IN_ ## mcdi_field), \
                       &value, sizeof(value));                       \
        } while (0)
#define COPY_FIELD(gen_flag, gen_field, mcdi_field)                          \
        if (spec->match_flags & EFX_FILTER_MATCH_ ## gen_flag) {     \
                COPY_VALUE(spec->gen_field, mcdi_field);             \
        }
        /*
         * Handle encap filters first.  They will always be mismatch
         * (unknown UC or MC) filters
         */
        if (encap_type) {
                /*
                 * ether_type and outer_ip_proto need to be variables
                 * because COPY_VALUE wants to memcpy them
                 */
                __be16 ether_type =
                        htons(encap_type & EFX_ENCAP_FLAG_IPV6 ?
                              ETH_P_IPV6 : ETH_P_IP);
                u8 vni_type = MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_GENEVE;
                u8 outer_ip_proto;

                switch (encap_type & EFX_ENCAP_TYPES_MASK) {
                case EFX_ENCAP_TYPE_VXLAN:
                        vni_type = MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_VXLAN;
                        fallthrough;
                case EFX_ENCAP_TYPE_GENEVE:
                        COPY_VALUE(ether_type, ETHER_TYPE);
                        outer_ip_proto = IPPROTO_UDP;
                        COPY_VALUE(outer_ip_proto, IP_PROTO);
                        /*
                         * We always need to set the type field, even
                         * though we're not matching on the TNI.
                         */
                        MCDI_POPULATE_DWORD_1(inbuf,
                                FILTER_OP_EXT_IN_VNI_OR_VSID,
                                FILTER_OP_EXT_IN_VNI_TYPE,
                                vni_type);
                        break;
                case EFX_ENCAP_TYPE_NVGRE:
                        COPY_VALUE(ether_type, ETHER_TYPE);
                        outer_ip_proto = IPPROTO_GRE;
                        COPY_VALUE(outer_ip_proto, IP_PROTO);
                        break;
                default:
                        WARN_ON(1);
                }

                uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN;
                mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN;
        } else {
                uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_UCAST_DST_LBN;
                mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_MCAST_DST_LBN;
        }

        if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC_IG)
                match_fields |=
                        is_multicast_ether_addr(spec->loc_mac) ?
                        1 << mc_match :
                        1 << uc_match;
        COPY_FIELD(REM_HOST, rem_host, SRC_IP);
        COPY_FIELD(LOC_HOST, loc_host, DST_IP);
        COPY_FIELD(REM_MAC, rem_mac, SRC_MAC);
        COPY_FIELD(REM_PORT, rem_port, SRC_PORT);
        COPY_FIELD(LOC_MAC, loc_mac, DST_MAC);
        COPY_FIELD(LOC_PORT, loc_port, DST_PORT);
        COPY_FIELD(ETHER_TYPE, ether_type, ETHER_TYPE);
        COPY_FIELD(INNER_VID, inner_vid, INNER_VLAN);
        COPY_FIELD(OUTER_VID, outer_vid, OUTER_VLAN);
        COPY_FIELD(IP_PROTO, ip_proto, IP_PROTO);
#undef COPY_FIELD
#undef COPY_VALUE
        MCDI_SET_DWORD(inbuf, FILTER_OP_IN_MATCH_FIELDS,
                       match_fields);
}

static void efx_mcdi_filter_push_prep(struct efx_nic *efx,
                                      const struct efx_filter_spec *spec,
                                      efx_dword_t *inbuf, u64 handle,
                                      struct efx_rss_context_priv *ctx,
                                      bool replacing)
{
        u32 flags = spec->flags;

        memset(inbuf, 0, MC_CMD_FILTER_OP_EXT_IN_LEN);

        /* If RSS filter, caller better have given us an RSS context */
        if (flags & EFX_FILTER_FLAG_RX_RSS) {
                /*
                 * We don't have the ability to return an error, so we'll just
                 * log a warning and disable RSS for the filter.
                 */
                if (WARN_ON_ONCE(!ctx))
                        flags &= ~EFX_FILTER_FLAG_RX_RSS;
                else if (WARN_ON_ONCE(ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID))
                        flags &= ~EFX_FILTER_FLAG_RX_RSS;
        }

        if (replacing) {
                MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
                               MC_CMD_FILTER_OP_IN_OP_REPLACE);
                MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, handle);
        } else {
                efx_mcdi_filter_push_prep_set_match_fields(efx, spec, inbuf);
        }

        if (flags & EFX_FILTER_FLAG_VPORT_ID)
                MCDI_SET_DWORD(inbuf, FILTER_OP_IN_PORT_ID, spec->vport_id);
        else
                MCDI_SET_DWORD(inbuf, FILTER_OP_IN_PORT_ID, efx->vport_id);
        MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_DEST,
                       spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ?
                       MC_CMD_FILTER_OP_IN_RX_DEST_DROP :
                       MC_CMD_FILTER_OP_IN_RX_DEST_HOST);
        MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DOMAIN, 0);
        MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DEST,
                       MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT);
        MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_QUEUE,
                       spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ?
                       0 : spec->dmaq_id);
        MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_MODE,
                       (flags & EFX_FILTER_FLAG_RX_RSS) ?
                       MC_CMD_FILTER_OP_IN_RX_MODE_RSS :
                       MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE);
        if (flags & EFX_FILTER_FLAG_RX_RSS)
                MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT, ctx->context_id);
}

static int efx_mcdi_filter_push(struct efx_nic *efx,
                                const struct efx_filter_spec *spec, u64 *handle,
                                struct efx_rss_context_priv *ctx, bool replacing)
{
        MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
        MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_EXT_OUT_LEN);
        size_t outlen;
        int rc;

        efx_mcdi_filter_push_prep(efx, spec, inbuf, *handle, ctx, replacing);
        rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
                                outbuf, sizeof(outbuf), &outlen);
        if (rc && spec->priority != EFX_FILTER_PRI_HINT)
                efx_mcdi_display_error(efx, MC_CMD_FILTER_OP, sizeof(inbuf),
                                       outbuf, outlen, rc);
        if (rc == 0)
                *handle = MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE);
        if (rc == -ENOSPC)
                rc = -EBUSY; /* to match efx_farch_filter_insert() */
        return rc;
}

static u32 efx_mcdi_filter_mcdi_flags_from_spec(const struct efx_filter_spec *spec)
{
        enum efx_encap_type encap_type = efx_filter_get_encap_type(spec);
        unsigned int match_flags = spec->match_flags;
        unsigned int uc_match, mc_match;
        u32 mcdi_flags = 0;

#define MAP_FILTER_TO_MCDI_FLAG(gen_flag, mcdi_field, encap) {          \
                unsigned int  old_match_flags = match_flags;            \
                match_flags &= ~EFX_FILTER_MATCH_ ## gen_flag;          \
                if (match_flags != old_match_flags)                     \
                        mcdi_flags |=                                   \
                                (1 << ((encap) ?                        \
                                       MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_ ## \
                                       mcdi_field ## _LBN :             \
                                       MC_CMD_FILTER_OP_EXT_IN_MATCH_ ##\
                                       mcdi_field ## _LBN));            \
        }
        /* inner or outer based on encap type */
        MAP_FILTER_TO_MCDI_FLAG(REM_HOST, SRC_IP, encap_type);
        MAP_FILTER_TO_MCDI_FLAG(LOC_HOST, DST_IP, encap_type);
        MAP_FILTER_TO_MCDI_FLAG(REM_MAC, SRC_MAC, encap_type);
        MAP_FILTER_TO_MCDI_FLAG(REM_PORT, SRC_PORT, encap_type);
        MAP_FILTER_TO_MCDI_FLAG(LOC_MAC, DST_MAC, encap_type);
        MAP_FILTER_TO_MCDI_FLAG(LOC_PORT, DST_PORT, encap_type);
        MAP_FILTER_TO_MCDI_FLAG(ETHER_TYPE, ETHER_TYPE, encap_type);
        MAP_FILTER_TO_MCDI_FLAG(IP_PROTO, IP_PROTO, encap_type);
        /* always outer */
        MAP_FILTER_TO_MCDI_FLAG(INNER_VID, INNER_VLAN, false);
        MAP_FILTER_TO_MCDI_FLAG(OUTER_VID, OUTER_VLAN, false);
#undef MAP_FILTER_TO_MCDI_FLAG

        /* special handling for encap type, and mismatch */
        if (encap_type) {
                match_flags &= ~EFX_FILTER_MATCH_ENCAP_TYPE;
                mcdi_flags |=
                        (1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ETHER_TYPE_LBN);
                mcdi_flags |= (1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_IP_PROTO_LBN);

                uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN;
                mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN;
        } else {
                uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_UCAST_DST_LBN;
                mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_MCAST_DST_LBN;
        }

        if (match_flags & EFX_FILTER_MATCH_LOC_MAC_IG) {
                match_flags &= ~EFX_FILTER_MATCH_LOC_MAC_IG;
                mcdi_flags |=
                        is_multicast_ether_addr(spec->loc_mac) ?
                        1 << mc_match :
                        1 << uc_match;
        }

        /* Did we map them all? */
        WARN_ON_ONCE(match_flags);

        return mcdi_flags;
}

static int efx_mcdi_filter_pri(struct efx_mcdi_filter_table *table,
                               const struct efx_filter_spec *spec)
{
        u32 mcdi_flags = efx_mcdi_filter_mcdi_flags_from_spec(spec);
        unsigned int match_pri;

        for (match_pri = 0;
             match_pri < table->rx_match_count;
             match_pri++)
                if (table->rx_match_mcdi_flags[match_pri] == mcdi_flags)
                        return match_pri;

        return -EPROTONOSUPPORT;
}

static s32 efx_mcdi_filter_insert_locked(struct efx_nic *efx,
                                         struct efx_filter_spec *spec,
                                         bool replace_equal)
{
        DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
        struct efx_rss_context_priv *ctx = NULL;
        struct efx_mcdi_filter_table *table;
        struct efx_filter_spec *saved_spec;
        unsigned int match_pri, hash;
        unsigned int priv_flags;
        bool rss_locked = false;
        bool replacing = false;
        unsigned int depth, i;
        int ins_index = -1;
        DEFINE_WAIT(wait);
        bool is_mc_recip;
        s32 rc;

        WARN_ON(!rwsem_is_locked(&efx->filter_sem));
        table = efx->filter_state;
        down_write(&table->lock);

        /* For now, only support RX filters */
        if ((spec->flags & (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)) !=
            EFX_FILTER_FLAG_RX) {
                rc = -EINVAL;
                goto out_unlock;
        }

        rc = efx_mcdi_filter_pri(table, spec);
        if (rc < 0)
                goto out_unlock;
        match_pri = rc;

        hash = efx_filter_spec_hash(spec);
        is_mc_recip = efx_filter_is_mc_recipient(spec);
        if (is_mc_recip)
                bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);

        if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
                mutex_lock(&efx->net_dev->ethtool->rss_lock);
                rss_locked = true;
                if (spec->rss_context)
                        ctx = efx_find_rss_context_entry(efx, spec->rss_context);
                else
                        ctx = &efx->rss_context.priv;
                if (!ctx) {
                        rc = -ENOENT;
                        goto out_unlock;
                }
                if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) {
                        rc = -EOPNOTSUPP;
                        goto out_unlock;
                }
        }

        /* Find any existing filters with the same match tuple or
         * else a free slot to insert at.
         */
        for (depth = 1; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) {
                i = (hash + depth) & (EFX_MCDI_FILTER_TBL_ROWS - 1);
                saved_spec = efx_mcdi_filter_entry_spec(table, i);

                if (!saved_spec) {
                        if (ins_index < 0)
                                ins_index = i;
                } else if (efx_filter_spec_equal(spec, saved_spec)) {
                        if (spec->priority < saved_spec->priority &&
                            spec->priority != EFX_FILTER_PRI_AUTO) {
                                rc = -EPERM;
                                goto out_unlock;
                        }
                        if (!is_mc_recip) {
                                /* This is the only one */
                                if (spec->priority ==
                                    saved_spec->priority &&
                                    !replace_equal) {
                                        rc = -EEXIST;
                                        goto out_unlock;
                                }
                                ins_index = i;
                                break;
                        } else if (spec->priority >
                                   saved_spec->priority ||
                                   (spec->priority ==
                                    saved_spec->priority &&
                                    replace_equal)) {
                                if (ins_index < 0)
                                        ins_index = i;
                                else
                                        __set_bit(depth, mc_rem_map);
                        }
                }
        }

        /* Once we reach the maximum search depth, use the first suitable
         * slot, or return -EBUSY if there was none
         */
        if (ins_index < 0) {
                rc = -EBUSY;
                goto out_unlock;
        }

        /* Create a software table entry if necessary. */
        saved_spec = efx_mcdi_filter_entry_spec(table, ins_index);
        if (saved_spec) {
                if (spec->priority == EFX_FILTER_PRI_AUTO &&
                    saved_spec->priority >= EFX_FILTER_PRI_AUTO) {
                        /* Just make sure it won't be removed */
                        if (saved_spec->priority > EFX_FILTER_PRI_AUTO)
                                saved_spec->flags |= EFX_FILTER_FLAG_RX_OVER_AUTO;
                        table->entry[ins_index].spec &=
                                ~EFX_EF10_FILTER_FLAG_AUTO_OLD;
                        rc = ins_index;
                        goto out_unlock;
                }
                replacing = true;
                priv_flags = efx_mcdi_filter_entry_flags(table, ins_index);
        } else {
                saved_spec = kmalloc_obj(*spec, GFP_ATOMIC);
                if (!saved_spec) {
                        rc = -ENOMEM;
                        goto out_unlock;
                }
                *saved_spec = *spec;
                priv_flags = 0;
        }
        efx_mcdi_filter_set_entry(table, ins_index, saved_spec, priv_flags);

        /* Actually insert the filter on the HW */
        rc = efx_mcdi_filter_push(efx, spec, &table->entry[ins_index].handle,
                                  ctx, replacing);

        if (rc == -EINVAL && efx->must_realloc_vis)
                /* The MC rebooted under us, causing it to reject our filter
                 * insertion as pointing to an invalid VI (spec->dmaq_id).
                 */
                rc = -EAGAIN;

        /* Finalise the software table entry */
        if (rc == 0) {
                if (replacing) {
                        /* Update the fields that may differ */
                        if (saved_spec->priority == EFX_FILTER_PRI_AUTO)
                                saved_spec->flags |=
                                        EFX_FILTER_FLAG_RX_OVER_AUTO;
                        saved_spec->priority = spec->priority;
                        saved_spec->flags &= EFX_FILTER_FLAG_RX_OVER_AUTO;
                        saved_spec->flags |= spec->flags;
                        saved_spec->rss_context = spec->rss_context;
                        saved_spec->dmaq_id = spec->dmaq_id;
                        saved_spec->vport_id = spec->vport_id;
                }
        } else if (!replacing) {
                kfree(saved_spec);
                saved_spec = NULL;
        } else {
                /* We failed to replace, so the old filter is still present.
                 * Roll back the software table to reflect this.  In fact the
                 * efx_mcdi_filter_set_entry() call below will do the right
                 * thing, so nothing extra is needed here.
                 */
        }
        efx_mcdi_filter_set_entry(table, ins_index, saved_spec, priv_flags);

        /* Remove and finalise entries for lower-priority multicast
         * recipients
         */
        if (is_mc_recip) {
                MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
                unsigned int depth, i;

                memset(inbuf, 0, sizeof(inbuf));

                for (depth = 0; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) {
                        if (!test_bit(depth, mc_rem_map))
                                continue;

                        i = (hash + depth) & (EFX_MCDI_FILTER_TBL_ROWS - 1);
                        saved_spec = efx_mcdi_filter_entry_spec(table, i);
                        priv_flags = efx_mcdi_filter_entry_flags(table, i);

                        if (rc == 0) {
                                MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
                                               MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
                                MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
                                               table->entry[i].handle);
                                rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP,
                                                  inbuf, sizeof(inbuf),
                                                  NULL, 0, NULL);
                        }

                        if (rc == 0) {
                                kfree(saved_spec);
                                saved_spec = NULL;
                                priv_flags = 0;
                        }
                        efx_mcdi_filter_set_entry(table, i, saved_spec,
                                                  priv_flags);
                }
        }

        /* If successful, return the inserted filter ID */
        if (rc == 0)
                rc = efx_mcdi_filter_make_filter_id(match_pri, ins_index);

out_unlock:
        if (rss_locked)
                mutex_unlock(&efx->net_dev->ethtool->rss_lock);
        up_write(&table->lock);
        return rc;
}

s32 efx_mcdi_filter_insert(struct efx_nic *efx, struct efx_filter_spec *spec,
                           bool replace_equal)
{
        s32 ret;

        down_read(&efx->filter_sem);
        ret = efx_mcdi_filter_insert_locked(efx, spec, replace_equal);
        up_read(&efx->filter_sem);

        return ret;
}

/*
 * Remove a filter.
 * If !by_index, remove by ID
 * If by_index, remove by index
 * Filter ID may come from userland and must be range-checked.
 * Caller must hold efx->filter_sem for read, and efx->filter_state->lock
 * for write.
 */
static int efx_mcdi_filter_remove_internal(struct efx_nic *efx,
                                           unsigned int priority_mask,
                                           u32 filter_id, bool by_index)
{
        unsigned int filter_idx = efx_mcdi_filter_get_unsafe_id(filter_id);
        struct efx_mcdi_filter_table *table = efx->filter_state;
        MCDI_DECLARE_BUF(inbuf,
                         MC_CMD_FILTER_OP_IN_HANDLE_OFST +
                         MC_CMD_FILTER_OP_IN_HANDLE_LEN);
        struct efx_filter_spec *spec;
        DEFINE_WAIT(wait);
        int rc;

        spec = efx_mcdi_filter_entry_spec(table, filter_idx);
        if (!spec ||
            (!by_index &&
             efx_mcdi_filter_pri(table, spec) !=
             efx_mcdi_filter_get_unsafe_pri(filter_id)))
                return -ENOENT;

        if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO &&
            priority_mask == (1U << EFX_FILTER_PRI_AUTO)) {
                /* Just remove flags */
                spec->flags &= ~EFX_FILTER_FLAG_RX_OVER_AUTO;
                table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_AUTO_OLD;
                return 0;
        }

        if (!(priority_mask & (1U << spec->priority)))
                return -ENOENT;

        if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO) {
                /* Reset to an automatic filter */

                struct efx_filter_spec new_spec = *spec;

                new_spec.priority = EFX_FILTER_PRI_AUTO;
                new_spec.flags = (EFX_FILTER_FLAG_RX |
                                  (efx_rss_active(&efx->rss_context.priv) ?
                                   EFX_FILTER_FLAG_RX_RSS : 0));
                new_spec.dmaq_id = 0;
                new_spec.rss_context = 0;
                rc = efx_mcdi_filter_push(efx, &new_spec,
                                          &table->entry[filter_idx].handle,
                                          &efx->rss_context.priv,
                                          true);

                if (rc == 0)
                        *spec = new_spec;
        } else {
                /* Really remove the filter */

                MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
                               efx_mcdi_filter_is_exclusive(spec) ?
                               MC_CMD_FILTER_OP_IN_OP_REMOVE :
                               MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
                MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
                               table->entry[filter_idx].handle);
                rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP,
                                        inbuf, sizeof(inbuf), NULL, 0, NULL);

                if ((rc == 0) || (rc == -ENOENT)) {
                        /* Filter removed OK or didn't actually exist */
                        kfree(spec);
                        efx_mcdi_filter_set_entry(table, filter_idx, NULL, 0);
                } else {
                        efx_mcdi_display_error(efx, MC_CMD_FILTER_OP,
                                               MC_CMD_FILTER_OP_EXT_IN_LEN,
                                               NULL, 0, rc);
                }
        }

        return rc;
}

/* Remove filters that weren't renewed. */
static void efx_mcdi_filter_remove_old(struct efx_nic *efx)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;
        int remove_failed = 0;
        int remove_noent = 0;
        int rc;
        int i;

        down_write(&table->lock);
        for (i = 0; i < EFX_MCDI_FILTER_TBL_ROWS; i++) {
                if (READ_ONCE(table->entry[i].spec) &
                    EFX_EF10_FILTER_FLAG_AUTO_OLD) {
                        rc = efx_mcdi_filter_remove_internal(efx,
                                        1U << EFX_FILTER_PRI_AUTO, i, true);
                        if (rc == -ENOENT)
                                remove_noent++;
                        else if (rc)
                                remove_failed++;
                }
        }
        up_write(&table->lock);

        if (remove_failed)
                netif_info(efx, drv, efx->net_dev,
                           "%s: failed to remove %d filters\n",
                           __func__, remove_failed);
        if (remove_noent)
                netif_info(efx, drv, efx->net_dev,
                           "%s: failed to remove %d non-existent filters\n",
                           __func__, remove_noent);
}

int efx_mcdi_filter_remove_safe(struct efx_nic *efx,
                                enum efx_filter_priority priority,
                                u32 filter_id)
{
        struct efx_mcdi_filter_table *table;
        int rc;

        down_read(&efx->filter_sem);
        table = efx->filter_state;
        down_write(&table->lock);
        rc = efx_mcdi_filter_remove_internal(efx, 1U << priority, filter_id,
                                             false);
        up_write(&table->lock);
        up_read(&efx->filter_sem);
        return rc;
}

/* Caller must hold efx->filter_sem for read */
static void efx_mcdi_filter_remove_unsafe(struct efx_nic *efx,
                                          enum efx_filter_priority priority,
                                          u32 filter_id)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;

        if (filter_id == EFX_EF10_FILTER_ID_INVALID)
                return;

        down_write(&table->lock);
        efx_mcdi_filter_remove_internal(efx, 1U << priority, filter_id,
                                        true);
        up_write(&table->lock);
}

int efx_mcdi_filter_get_safe(struct efx_nic *efx,
                             enum efx_filter_priority priority,
                             u32 filter_id, struct efx_filter_spec *spec)
{
        unsigned int filter_idx = efx_mcdi_filter_get_unsafe_id(filter_id);
        const struct efx_filter_spec *saved_spec;
        struct efx_mcdi_filter_table *table;
        int rc;

        down_read(&efx->filter_sem);
        table = efx->filter_state;
        down_read(&table->lock);
        saved_spec = efx_mcdi_filter_entry_spec(table, filter_idx);
        if (saved_spec && saved_spec->priority == priority &&
            efx_mcdi_filter_pri(table, saved_spec) ==
            efx_mcdi_filter_get_unsafe_pri(filter_id)) {
                *spec = *saved_spec;
                rc = 0;
        } else {
                rc = -ENOENT;
        }
        up_read(&table->lock);
        up_read(&efx->filter_sem);
        return rc;
}

static int efx_mcdi_filter_insert_addr_list(struct efx_nic *efx,
                                            struct efx_mcdi_filter_vlan *vlan,
                                            bool multicast, bool rollback)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;
        struct efx_mcdi_dev_addr *addr_list;
        enum efx_filter_flags filter_flags;
        struct efx_filter_spec spec;
        u8 baddr[ETH_ALEN];
        unsigned int i, j;
        int addr_count;
        u16 *ids;
        int rc;

        if (multicast) {
                addr_list = table->dev_mc_list;
                addr_count = table->dev_mc_count;
                ids = vlan->mc;
        } else {
                addr_list = table->dev_uc_list;
                addr_count = table->dev_uc_count;
                ids = vlan->uc;
        }

        filter_flags = efx_rss_active(&efx->rss_context.priv) ? EFX_FILTER_FLAG_RX_RSS : 0;

        /* Insert/renew filters */
        for (i = 0; i < addr_count; i++) {
                EFX_WARN_ON_PARANOID(ids[i] != EFX_EF10_FILTER_ID_INVALID);
                efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
                efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr);
                rc = efx_mcdi_filter_insert_locked(efx, &spec, true);
                if (rc < 0) {
                        if (rollback) {
                                netif_info(efx, drv, efx->net_dev,
                                           "efx_mcdi_filter_insert failed rc=%d\n",
                                           rc);
                                /* Fall back to promiscuous */
                                for (j = 0; j < i; j++) {
                                        efx_mcdi_filter_remove_unsafe(
                                                efx, EFX_FILTER_PRI_AUTO,
                                                ids[j]);
                                        ids[j] = EFX_EF10_FILTER_ID_INVALID;
                                }
                                return rc;
                        } else {
                                /* keep invalid ID, and carry on */
                        }
                } else {
                        ids[i] = efx_mcdi_filter_get_unsafe_id(rc);
                }
        }

        if (multicast && rollback) {
                /* Also need an Ethernet broadcast filter */
                EFX_WARN_ON_PARANOID(vlan->default_filters[EFX_EF10_BCAST] !=
                                     EFX_EF10_FILTER_ID_INVALID);
                efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
                eth_broadcast_addr(baddr);
                efx_filter_set_eth_local(&spec, vlan->vid, baddr);
                rc = efx_mcdi_filter_insert_locked(efx, &spec, true);
                if (rc < 0) {
                        netif_warn(efx, drv, efx->net_dev,
                                   "Broadcast filter insert failed rc=%d\n", rc);
                        /* Fall back to promiscuous */
                        for (j = 0; j < i; j++) {
                                efx_mcdi_filter_remove_unsafe(
                                        efx, EFX_FILTER_PRI_AUTO,
                                        ids[j]);
                                ids[j] = EFX_EF10_FILTER_ID_INVALID;
                        }
                        return rc;
                } else {
                        vlan->default_filters[EFX_EF10_BCAST] =
                                efx_mcdi_filter_get_unsafe_id(rc);
                }
        }

        return 0;
}

static int efx_mcdi_filter_insert_def(struct efx_nic *efx,
                                      struct efx_mcdi_filter_vlan *vlan,
                                      enum efx_encap_type encap_type,
                                      bool multicast, bool rollback)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;
        enum efx_filter_flags filter_flags;
        struct efx_filter_spec spec;
        u8 baddr[ETH_ALEN];
        int rc;
        u16 *id;

        filter_flags = efx_rss_active(&efx->rss_context.priv) ? EFX_FILTER_FLAG_RX_RSS : 0;

        efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);

        if (multicast)
                efx_filter_set_mc_def(&spec);
        else
                efx_filter_set_uc_def(&spec);

        if (encap_type) {
                if (efx_has_cap(efx, VXLAN_NVGRE))
                        efx_filter_set_encap_type(&spec, encap_type);
                else
                        /*
                         * don't insert encap filters on non-supporting
                         * platforms. ID will be left as INVALID.
                         */
                        return 0;
        }

        if (vlan->vid != EFX_FILTER_VID_UNSPEC)
                efx_filter_set_eth_local(&spec, vlan->vid, NULL);

        rc = efx_mcdi_filter_insert_locked(efx, &spec, true);
        if (rc < 0) {
                const char *um = multicast ? "Multicast" : "Unicast";
                const char *encap_name = "";
                const char *encap_ipv = "";

                if ((encap_type & EFX_ENCAP_TYPES_MASK) ==
                    EFX_ENCAP_TYPE_VXLAN)
                        encap_name = "VXLAN ";
                else if ((encap_type & EFX_ENCAP_TYPES_MASK) ==
                         EFX_ENCAP_TYPE_NVGRE)
                        encap_name = "NVGRE ";
                else if ((encap_type & EFX_ENCAP_TYPES_MASK) ==
                         EFX_ENCAP_TYPE_GENEVE)
                        encap_name = "GENEVE ";
                if (encap_type & EFX_ENCAP_FLAG_IPV6)
                        encap_ipv = "IPv6 ";
                else if (encap_type)
                        encap_ipv = "IPv4 ";

                /*
                 * unprivileged functions can't insert mismatch filters
                 * for encapsulated or unicast traffic, so downgrade
                 * those warnings to debug.
                 */
                netif_cond_dbg(efx, drv, efx->net_dev,
                               rc == -EPERM && (encap_type || !multicast), warn,
                               "%s%s%s mismatch filter insert failed rc=%d\n",
                               encap_name, encap_ipv, um, rc);
        } else if (multicast) {
                /* mapping from encap types to default filter IDs (multicast) */
                static enum efx_mcdi_filter_default_filters map[] = {
                        [EFX_ENCAP_TYPE_NONE] = EFX_EF10_MCDEF,
                        [EFX_ENCAP_TYPE_VXLAN] = EFX_EF10_VXLAN4_MCDEF,
                        [EFX_ENCAP_TYPE_NVGRE] = EFX_EF10_NVGRE4_MCDEF,
                        [EFX_ENCAP_TYPE_GENEVE] = EFX_EF10_GENEVE4_MCDEF,
                        [EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6] =
                                EFX_EF10_VXLAN6_MCDEF,
                        [EFX_ENCAP_TYPE_NVGRE | EFX_ENCAP_FLAG_IPV6] =
                                EFX_EF10_NVGRE6_MCDEF,
                        [EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6] =
                                EFX_EF10_GENEVE6_MCDEF,
                };

                /* quick bounds check (BCAST result impossible) */
                BUILD_BUG_ON(EFX_EF10_BCAST != 0);
                if (encap_type >= ARRAY_SIZE(map) || map[encap_type] == 0) {
                        WARN_ON(1);
                        return -EINVAL;
                }
                /* then follow map */
                id = &vlan->default_filters[map[encap_type]];

                EFX_WARN_ON_PARANOID(*id != EFX_EF10_FILTER_ID_INVALID);
                *id = efx_mcdi_filter_get_unsafe_id(rc);
                if (!table->mc_chaining && !encap_type) {
                        /* Also need an Ethernet broadcast filter */
                        efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO,
                                           filter_flags, 0);
                        eth_broadcast_addr(baddr);
                        efx_filter_set_eth_local(&spec, vlan->vid, baddr);
                        rc = efx_mcdi_filter_insert_locked(efx, &spec, true);
                        if (rc < 0) {
                                netif_warn(efx, drv, efx->net_dev,
                                           "Broadcast filter insert failed rc=%d\n",
                                           rc);
                                if (rollback) {
                                        /* Roll back the mc_def filter */
                                        efx_mcdi_filter_remove_unsafe(
                                                        efx, EFX_FILTER_PRI_AUTO,
                                                        *id);
                                        *id = EFX_EF10_FILTER_ID_INVALID;
                                        return rc;
                                }
                        } else {
                                EFX_WARN_ON_PARANOID(
                                        vlan->default_filters[EFX_EF10_BCAST] !=
                                        EFX_EF10_FILTER_ID_INVALID);
                                vlan->default_filters[EFX_EF10_BCAST] =
                                        efx_mcdi_filter_get_unsafe_id(rc);
                        }
                }
                rc = 0;
        } else {
                /* mapping from encap types to default filter IDs (unicast) */
                static enum efx_mcdi_filter_default_filters map[] = {
                        [EFX_ENCAP_TYPE_NONE] = EFX_EF10_UCDEF,
                        [EFX_ENCAP_TYPE_VXLAN] = EFX_EF10_VXLAN4_UCDEF,
                        [EFX_ENCAP_TYPE_NVGRE] = EFX_EF10_NVGRE4_UCDEF,
                        [EFX_ENCAP_TYPE_GENEVE] = EFX_EF10_GENEVE4_UCDEF,
                        [EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6] =
                                EFX_EF10_VXLAN6_UCDEF,
                        [EFX_ENCAP_TYPE_NVGRE | EFX_ENCAP_FLAG_IPV6] =
                                EFX_EF10_NVGRE6_UCDEF,
                        [EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6] =
                                EFX_EF10_GENEVE6_UCDEF,
                };

                /* quick bounds check (BCAST result impossible) */
                BUILD_BUG_ON(EFX_EF10_BCAST != 0);
                if (encap_type >= ARRAY_SIZE(map) || map[encap_type] == 0) {
                        WARN_ON(1);
                        return -EINVAL;
                }
                /* then follow map */
                id = &vlan->default_filters[map[encap_type]];
                EFX_WARN_ON_PARANOID(*id != EFX_EF10_FILTER_ID_INVALID);
                *id = rc;
                rc = 0;
        }
        return rc;
}

/*
 * Caller must hold efx->filter_sem for read if race against
 * efx_mcdi_filter_table_remove() is possible
 */
static void efx_mcdi_filter_vlan_sync_rx_mode(struct efx_nic *efx,
                                              struct efx_mcdi_filter_vlan *vlan)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;

        /*
         * Do not install unspecified VID if VLAN filtering is enabled.
         * Do not install all specified VIDs if VLAN filtering is disabled.
         */
        if ((vlan->vid == EFX_FILTER_VID_UNSPEC) == table->vlan_filter)
                return;

        /* Insert/renew unicast filters */
        if (table->uc_promisc) {
                efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NONE,
                                           false, false);
                efx_mcdi_filter_insert_addr_list(efx, vlan, false, false);
        } else {
                /*
                 * If any of the filters failed to insert, fall back to
                 * promiscuous mode - add in the uc_def filter.  But keep
                 * our individual unicast filters.
                 */
                if (efx_mcdi_filter_insert_addr_list(efx, vlan, false, false))
                        efx_mcdi_filter_insert_def(efx, vlan,
                                                   EFX_ENCAP_TYPE_NONE,
                                                   false, false);
        }
        efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN,
                                   false, false);
        efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN |
                                              EFX_ENCAP_FLAG_IPV6,
                                   false, false);
        efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE,
                                   false, false);
        efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE |
                                              EFX_ENCAP_FLAG_IPV6,
                                   false, false);
        efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE,
                                   false, false);
        efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE |
                                              EFX_ENCAP_FLAG_IPV6,
                                   false, false);

        /*
         * Insert/renew multicast filters
         *
         * If changing promiscuous state with cascaded multicast filters, remove
         * old filters first, so that packets are dropped rather than duplicated
         */
        if (table->mc_chaining && table->mc_promisc_last != table->mc_promisc)
                efx_mcdi_filter_remove_old(efx);
        if (table->mc_promisc) {
                if (table->mc_chaining) {
                        /*
                         * If we failed to insert promiscuous filters, rollback
                         * and fall back to individual multicast filters
                         */
                        if (efx_mcdi_filter_insert_def(efx, vlan,
                                                       EFX_ENCAP_TYPE_NONE,
                                                       true, true)) {
                                /* Changing promisc state, so remove old filters */
                                efx_mcdi_filter_remove_old(efx);
                                efx_mcdi_filter_insert_addr_list(efx, vlan,
                                                                 true, false);
                        }
                } else {
                        /*
                         * If we failed to insert promiscuous filters, don't
                         * rollback.  Regardless, also insert the mc_list,
                         * unless it's incomplete due to overflow
                         */
                        efx_mcdi_filter_insert_def(efx, vlan,
                                                   EFX_ENCAP_TYPE_NONE,
                                                   true, false);
                        if (!table->mc_overflow)
                                efx_mcdi_filter_insert_addr_list(efx, vlan,
                                                                 true, false);
                }
        } else {
                /*
                 * If any filters failed to insert, rollback and fall back to
                 * promiscuous mode - mc_def filter and maybe broadcast.  If
                 * that fails, roll back again and insert as many of our
                 * individual multicast filters as we can.
                 */
                if (efx_mcdi_filter_insert_addr_list(efx, vlan, true, true)) {
                        /* Changing promisc state, so remove old filters */
                        if (table->mc_chaining)
                                efx_mcdi_filter_remove_old(efx);
                        if (efx_mcdi_filter_insert_def(efx, vlan,
                                                       EFX_ENCAP_TYPE_NONE,
                                                       true, true))
                                efx_mcdi_filter_insert_addr_list(efx, vlan,
                                                                 true, false);
                }
        }
        efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN,
                                   true, false);
        efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN |
                                              EFX_ENCAP_FLAG_IPV6,
                                   true, false);
        efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE,
                                   true, false);
        efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE |
                                              EFX_ENCAP_FLAG_IPV6,
                                   true, false);
        efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE,
                                   true, false);
        efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE |
                                              EFX_ENCAP_FLAG_IPV6,
                                   true, false);
}

int efx_mcdi_filter_clear_rx(struct efx_nic *efx,
                             enum efx_filter_priority priority)
{
        struct efx_mcdi_filter_table *table;
        unsigned int priority_mask;
        unsigned int i;
        int rc;

        priority_mask = (((1U << (priority + 1)) - 1) &
                         ~(1U << EFX_FILTER_PRI_AUTO));

        down_read(&efx->filter_sem);
        table = efx->filter_state;
        down_write(&table->lock);
        for (i = 0; i < EFX_MCDI_FILTER_TBL_ROWS; i++) {
                rc = efx_mcdi_filter_remove_internal(efx, priority_mask,
                                                     i, true);
                if (rc && rc != -ENOENT)
                        break;
                rc = 0;
        }

        up_write(&table->lock);
        up_read(&efx->filter_sem);
        return rc;
}

u32 efx_mcdi_filter_count_rx_used(struct efx_nic *efx,
                                 enum efx_filter_priority priority)
{
        struct efx_mcdi_filter_table *table;
        unsigned int filter_idx;
        s32 count = 0;

        down_read(&efx->filter_sem);
        table = efx->filter_state;
        down_read(&table->lock);
        for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) {
                if (table->entry[filter_idx].spec &&
                    efx_mcdi_filter_entry_spec(table, filter_idx)->priority ==
                    priority)
                        ++count;
        }
        up_read(&table->lock);
        up_read(&efx->filter_sem);
        return count;
}

u32 efx_mcdi_filter_get_rx_id_limit(struct efx_nic *efx)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;

        return table->rx_match_count * EFX_MCDI_FILTER_TBL_ROWS * 2;
}

s32 efx_mcdi_filter_get_rx_ids(struct efx_nic *efx,
                               enum efx_filter_priority priority,
                               u32 *buf, u32 size)
{
        struct efx_mcdi_filter_table *table;
        struct efx_filter_spec *spec;
        unsigned int filter_idx;
        s32 count = 0;

        down_read(&efx->filter_sem);
        table = efx->filter_state;
        down_read(&table->lock);

        for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) {
                spec = efx_mcdi_filter_entry_spec(table, filter_idx);
                if (spec && spec->priority == priority) {
                        if (count == size) {
                                count = -EMSGSIZE;
                                break;
                        }
                        buf[count++] =
                                efx_mcdi_filter_make_filter_id(
                                        efx_mcdi_filter_pri(table, spec),
                                        filter_idx);
                }
        }
        up_read(&table->lock);
        up_read(&efx->filter_sem);
        return count;
}

static int efx_mcdi_filter_match_flags_from_mcdi(bool encap, u32 mcdi_flags)
{
        int match_flags = 0;

#define MAP_FLAG(gen_flag, mcdi_field) do {                             \
                u32 old_mcdi_flags = mcdi_flags;                        \
                mcdi_flags &= ~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ ##  \
                                     mcdi_field ## _LBN);               \
                if (mcdi_flags != old_mcdi_flags)                       \
                        match_flags |= EFX_FILTER_MATCH_ ## gen_flag;   \
        } while (0)

        if (encap) {
                /* encap filters must specify encap type */
                match_flags |= EFX_FILTER_MATCH_ENCAP_TYPE;
                /* and imply ethertype and ip proto */
                mcdi_flags &=
                        ~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_IP_PROTO_LBN);
                mcdi_flags &=
                        ~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ETHER_TYPE_LBN);
                /* VLAN tags refer to the outer packet */
                MAP_FLAG(INNER_VID, INNER_VLAN);
                MAP_FLAG(OUTER_VID, OUTER_VLAN);
                /* everything else refers to the inner packet */
                MAP_FLAG(LOC_MAC_IG, IFRM_UNKNOWN_UCAST_DST);
                MAP_FLAG(LOC_MAC_IG, IFRM_UNKNOWN_MCAST_DST);
                MAP_FLAG(REM_HOST, IFRM_SRC_IP);
                MAP_FLAG(LOC_HOST, IFRM_DST_IP);
                MAP_FLAG(REM_MAC, IFRM_SRC_MAC);
                MAP_FLAG(REM_PORT, IFRM_SRC_PORT);
                MAP_FLAG(LOC_MAC, IFRM_DST_MAC);
                MAP_FLAG(LOC_PORT, IFRM_DST_PORT);
                MAP_FLAG(ETHER_TYPE, IFRM_ETHER_TYPE);
                MAP_FLAG(IP_PROTO, IFRM_IP_PROTO);
        } else {
                MAP_FLAG(LOC_MAC_IG, UNKNOWN_UCAST_DST);
                MAP_FLAG(LOC_MAC_IG, UNKNOWN_MCAST_DST);
                MAP_FLAG(REM_HOST, SRC_IP);
                MAP_FLAG(LOC_HOST, DST_IP);
                MAP_FLAG(REM_MAC, SRC_MAC);
                MAP_FLAG(REM_PORT, SRC_PORT);
                MAP_FLAG(LOC_MAC, DST_MAC);
                MAP_FLAG(LOC_PORT, DST_PORT);
                MAP_FLAG(ETHER_TYPE, ETHER_TYPE);
                MAP_FLAG(INNER_VID, INNER_VLAN);
                MAP_FLAG(OUTER_VID, OUTER_VLAN);
                MAP_FLAG(IP_PROTO, IP_PROTO);
        }
#undef MAP_FLAG

        /* Did we map them all? */
        if (mcdi_flags)
                return -EINVAL;

        return match_flags;
}

bool efx_mcdi_filter_match_supported(struct efx_mcdi_filter_table *table,
                                     bool encap,
                                     enum efx_filter_match_flags match_flags)
{
        unsigned int match_pri;
        int mf;

        for (match_pri = 0;
             match_pri < table->rx_match_count;
             match_pri++) {
                mf = efx_mcdi_filter_match_flags_from_mcdi(encap,
                                table->rx_match_mcdi_flags[match_pri]);
                if (mf == match_flags)
                        return true;
        }

        return false;
}

static int
efx_mcdi_filter_table_probe_matches(struct efx_nic *efx,
                                    struct efx_mcdi_filter_table *table,
                                    bool encap)
{
        MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PARSER_DISP_INFO_IN_LEN);
        MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMAX);
        unsigned int pd_match_pri, pd_match_count;
        size_t outlen;
        int rc;

        /* Find out which RX filter types are supported, and their priorities */
        MCDI_SET_DWORD(inbuf, GET_PARSER_DISP_INFO_IN_OP,
                       encap ?
                       MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_ENCAP_RX_MATCHES :
                       MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_RX_MATCHES);
        rc = efx_mcdi_rpc(efx, MC_CMD_GET_PARSER_DISP_INFO,
                          inbuf, sizeof(inbuf), outbuf, sizeof(outbuf),
                          &outlen);
        if (rc)
                return rc;

        pd_match_count = MCDI_VAR_ARRAY_LEN(
                outlen, GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES);

        for (pd_match_pri = 0; pd_match_pri < pd_match_count; pd_match_pri++) {
                u32 mcdi_flags =
                        MCDI_ARRAY_DWORD(
                                outbuf,
                                GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES,
                                pd_match_pri);
                rc = efx_mcdi_filter_match_flags_from_mcdi(encap, mcdi_flags);
                if (rc < 0) {
                        netif_dbg(efx, probe, efx->net_dev,
                                  "%s: fw flags %#x pri %u not supported in driver\n",
                                  __func__, mcdi_flags, pd_match_pri);
                } else {
                        netif_dbg(efx, probe, efx->net_dev,
                                  "%s: fw flags %#x pri %u supported as driver flags %#x pri %u\n",
                                  __func__, mcdi_flags, pd_match_pri,
                                  rc, table->rx_match_count);
                        table->rx_match_mcdi_flags[table->rx_match_count] = mcdi_flags;
                        table->rx_match_count++;
                }
        }

        return 0;
}

int efx_mcdi_filter_table_probe(struct efx_nic *efx, bool multicast_chaining)
{
        struct net_device *net_dev = efx->net_dev;
        struct efx_mcdi_filter_table *table;
        int rc;

        if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
                return -EINVAL;

        if (efx->filter_state) /* already probed */
                return 0;

        table = kzalloc_obj(*table);
        if (!table)
                return -ENOMEM;

        table->mc_chaining = multicast_chaining;
        table->rx_match_count = 0;
        rc = efx_mcdi_filter_table_probe_matches(efx, table, false);
        if (rc)
                goto fail;
        if (efx_has_cap(efx, VXLAN_NVGRE))
                rc = efx_mcdi_filter_table_probe_matches(efx, table, true);
        if (rc)
                goto fail;
        if ((efx_supported_features(efx) & NETIF_F_HW_VLAN_CTAG_FILTER) &&
            !(efx_mcdi_filter_match_supported(table, false,
                (EFX_FILTER_MATCH_OUTER_VID | EFX_FILTER_MATCH_LOC_MAC)) &&
              efx_mcdi_filter_match_supported(table, false,
                (EFX_FILTER_MATCH_OUTER_VID | EFX_FILTER_MATCH_LOC_MAC_IG)))) {
                netif_info(efx, probe, net_dev,
                           "VLAN filters are not supported in this firmware variant\n");
                net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
                efx->fixed_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
                net_dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
        }

        table->entry = vzalloc(array_size(EFX_MCDI_FILTER_TBL_ROWS,
                                          sizeof(*table->entry)));
        if (!table->entry) {
                rc = -ENOMEM;
                goto fail;
        }

        table->mc_promisc_last = false;
        table->vlan_filter =
                !!(efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER);
        INIT_LIST_HEAD(&table->vlan_list);
        init_rwsem(&table->lock);

        efx->filter_state = table;

        return 0;
fail:
        kfree(table);
        return rc;
}

void efx_mcdi_filter_table_reset_mc_allocations(struct efx_nic *efx)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;

        if (table) {
                table->must_restore_filters = true;
                table->must_restore_rss_contexts = true;
        }
}

/*
 * Caller must hold efx->filter_sem for read if race against
 * efx_mcdi_filter_table_remove() is possible
 */
void efx_mcdi_filter_table_restore(struct efx_nic *efx)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;
        unsigned int invalid_filters = 0, failed = 0;
        struct efx_mcdi_filter_vlan *vlan;
        struct efx_rss_context_priv *ctx;
        struct efx_filter_spec *spec;
        unsigned int filter_idx;
        u32 mcdi_flags;
        int match_pri;
        int rc, i;

        WARN_ON(!rwsem_is_locked(&efx->filter_sem));

        if (!table || !table->must_restore_filters)
                return;

        down_write(&table->lock);
        mutex_lock(&efx->net_dev->ethtool->rss_lock);

        for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) {
                spec = efx_mcdi_filter_entry_spec(table, filter_idx);
                if (!spec)
                        continue;

                mcdi_flags = efx_mcdi_filter_mcdi_flags_from_spec(spec);
                match_pri = 0;
                while (match_pri < table->rx_match_count &&
                       table->rx_match_mcdi_flags[match_pri] != mcdi_flags)
                        ++match_pri;
                if (match_pri >= table->rx_match_count) {
                        invalid_filters++;
                        goto not_restored;
                }
                if (spec->rss_context)
                        ctx = efx_find_rss_context_entry(efx, spec->rss_context);
                else
                        ctx = &efx->rss_context.priv;
                if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
                        if (!ctx) {
                                netif_warn(efx, drv, efx->net_dev,
                                           "Warning: unable to restore a filter with nonexistent RSS context %u.\n",
                                           spec->rss_context);
                                invalid_filters++;
                                goto not_restored;
                        }
                        if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) {
                                netif_warn(efx, drv, efx->net_dev,
                                           "Warning: unable to restore a filter with RSS context %u as it was not created.\n",
                                           spec->rss_context);
                                invalid_filters++;
                                goto not_restored;
                        }
                }

                rc = efx_mcdi_filter_push(efx, spec,
                                          &table->entry[filter_idx].handle,
                                          ctx, false);
                if (rc)
                        failed++;

                if (rc) {
not_restored:
                        list_for_each_entry(vlan, &table->vlan_list, list)
                                for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; ++i)
                                        if (vlan->default_filters[i] == filter_idx)
                                                vlan->default_filters[i] =
                                                        EFX_EF10_FILTER_ID_INVALID;

                        kfree(spec);
                        efx_mcdi_filter_set_entry(table, filter_idx, NULL, 0);
                }
        }

        mutex_unlock(&efx->net_dev->ethtool->rss_lock);
        up_write(&table->lock);

        /*
         * This can happen validly if the MC's capabilities have changed, so
         * is not an error.
         */
        if (invalid_filters)
                netif_dbg(efx, drv, efx->net_dev,
                          "Did not restore %u filters that are now unsupported.\n",
                          invalid_filters);

        if (failed)
                netif_err(efx, hw, efx->net_dev,
                          "unable to restore %u filters\n", failed);
        else
                table->must_restore_filters = false;
}

void efx_mcdi_filter_table_down(struct efx_nic *efx)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;
        MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
        struct efx_filter_spec *spec;
        unsigned int filter_idx;
        int rc;

        if (!table)
                return;

        efx_mcdi_filter_cleanup_vlans(efx);

        for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) {
                spec = efx_mcdi_filter_entry_spec(table, filter_idx);
                if (!spec)
                        continue;

                MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
                               efx_mcdi_filter_is_exclusive(spec) ?
                               MC_CMD_FILTER_OP_IN_OP_REMOVE :
                               MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
                MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
                               table->entry[filter_idx].handle);
                rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf,
                                        sizeof(inbuf), NULL, 0, NULL);
                if (rc)
                        netif_info(efx, drv, efx->net_dev,
                                   "%s: filter %04x remove failed\n",
                                   __func__, filter_idx);
                kfree(spec);
        }
}

void efx_mcdi_filter_table_remove(struct efx_nic *efx)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;

        efx_mcdi_filter_table_down(efx);

        efx->filter_state = NULL;
        /*
         * If we were called without locking, then it's not safe to free
         * the table as others might be using it.  So we just WARN, leak
         * the memory, and potentially get an inconsistent filter table
         * state.
         * This should never actually happen.
         */
        if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
                return;

        if (!table)
                return;

        vfree(table->entry);
        kfree(table);
}

static void efx_mcdi_filter_mark_one_old(struct efx_nic *efx, uint16_t *id)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;
        unsigned int filter_idx;

        efx_rwsem_assert_write_locked(&table->lock);

        if (*id != EFX_EF10_FILTER_ID_INVALID) {
                filter_idx = efx_mcdi_filter_get_unsafe_id(*id);
                if (!table->entry[filter_idx].spec)
                        netif_dbg(efx, drv, efx->net_dev,
                                  "marked null spec old %04x:%04x\n", *id,
                                  filter_idx);
                table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_AUTO_OLD;
                *id = EFX_EF10_FILTER_ID_INVALID;
        }
}

/* Mark old per-VLAN filters that may need to be removed */
static void _efx_mcdi_filter_vlan_mark_old(struct efx_nic *efx,
                                           struct efx_mcdi_filter_vlan *vlan)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;
        unsigned int i;

        for (i = 0; i < table->dev_uc_count; i++)
                efx_mcdi_filter_mark_one_old(efx, &vlan->uc[i]);
        for (i = 0; i < table->dev_mc_count; i++)
                efx_mcdi_filter_mark_one_old(efx, &vlan->mc[i]);
        for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++)
                efx_mcdi_filter_mark_one_old(efx, &vlan->default_filters[i]);
}

/*
 * Mark old filters that may need to be removed.
 * Caller must hold efx->filter_sem for read if race against
 * efx_mcdi_filter_table_remove() is possible
 */
static void efx_mcdi_filter_mark_old(struct efx_nic *efx)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;
        struct efx_mcdi_filter_vlan *vlan;

        down_write(&table->lock);
        list_for_each_entry(vlan, &table->vlan_list, list)
                _efx_mcdi_filter_vlan_mark_old(efx, vlan);
        up_write(&table->lock);
}

int efx_mcdi_filter_add_vlan(struct efx_nic *efx, u16 vid)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;
        struct efx_mcdi_filter_vlan *vlan;
        unsigned int i;

        if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
                return -EINVAL;

        vlan = efx_mcdi_filter_find_vlan(efx, vid);
        if (WARN_ON(vlan)) {
                netif_err(efx, drv, efx->net_dev,
                          "VLAN %u already added\n", vid);
                return -EALREADY;
        }

        vlan = kzalloc_obj(*vlan);
        if (!vlan)
                return -ENOMEM;

        vlan->vid = vid;

        for (i = 0; i < ARRAY_SIZE(vlan->uc); i++)
                vlan->uc[i] = EFX_EF10_FILTER_ID_INVALID;
        for (i = 0; i < ARRAY_SIZE(vlan->mc); i++)
                vlan->mc[i] = EFX_EF10_FILTER_ID_INVALID;
        for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++)
                vlan->default_filters[i] = EFX_EF10_FILTER_ID_INVALID;

        list_add_tail(&vlan->list, &table->vlan_list);

        if (efx_dev_registered(efx))
                efx_mcdi_filter_vlan_sync_rx_mode(efx, vlan);

        return 0;
}

static void efx_mcdi_filter_del_vlan_internal(struct efx_nic *efx,
                                              struct efx_mcdi_filter_vlan *vlan)
{
        unsigned int i;

        /* See comment in efx_mcdi_filter_table_remove() */
        if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
                return;

        list_del(&vlan->list);

        for (i = 0; i < ARRAY_SIZE(vlan->uc); i++)
                efx_mcdi_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO,
                                              vlan->uc[i]);
        for (i = 0; i < ARRAY_SIZE(vlan->mc); i++)
                efx_mcdi_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO,
                                              vlan->mc[i]);
        for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++)
                if (vlan->default_filters[i] != EFX_EF10_FILTER_ID_INVALID)
                        efx_mcdi_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO,
                                                      vlan->default_filters[i]);

        kfree(vlan);
}

void efx_mcdi_filter_del_vlan(struct efx_nic *efx, u16 vid)
{
        struct efx_mcdi_filter_vlan *vlan;

        /* See comment in efx_mcdi_filter_table_remove() */
        if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
                return;

        vlan = efx_mcdi_filter_find_vlan(efx, vid);
        if (!vlan) {
                netif_err(efx, drv, efx->net_dev,
                          "VLAN %u not found in filter state\n", vid);
                return;
        }

        efx_mcdi_filter_del_vlan_internal(efx, vlan);
}

struct efx_mcdi_filter_vlan *efx_mcdi_filter_find_vlan(struct efx_nic *efx,
                                                       u16 vid)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;
        struct efx_mcdi_filter_vlan *vlan;

        WARN_ON(!rwsem_is_locked(&efx->filter_sem));

        list_for_each_entry(vlan, &table->vlan_list, list) {
                if (vlan->vid == vid)
                        return vlan;
        }

        return NULL;
}

void efx_mcdi_filter_cleanup_vlans(struct efx_nic *efx)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;
        struct efx_mcdi_filter_vlan *vlan, *next_vlan;

        /* See comment in efx_mcdi_filter_table_remove() */
        if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
                return;

        if (!table)
                return;

        list_for_each_entry_safe(vlan, next_vlan, &table->vlan_list, list)
                efx_mcdi_filter_del_vlan_internal(efx, vlan);
}

static void efx_mcdi_filter_uc_addr_list(struct efx_nic *efx)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;
        struct net_device *net_dev = efx->net_dev;
        struct netdev_hw_addr *uc;
        unsigned int i;

        table->uc_promisc = !!(net_dev->flags & IFF_PROMISC);
        ether_addr_copy(table->dev_uc_list[0].addr, net_dev->dev_addr);
        i = 1;
        netdev_for_each_uc_addr(uc, net_dev) {
                if (i >= EFX_EF10_FILTER_DEV_UC_MAX) {
                        table->uc_promisc = true;
                        break;
                }
                ether_addr_copy(table->dev_uc_list[i].addr, uc->addr);
                i++;
        }

        table->dev_uc_count = i;
}

static void efx_mcdi_filter_mc_addr_list(struct efx_nic *efx)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;
        struct net_device *net_dev = efx->net_dev;
        struct netdev_hw_addr *mc;
        unsigned int i;

        table->mc_overflow = false;
        table->mc_promisc = !!(net_dev->flags & (IFF_PROMISC | IFF_ALLMULTI));

        i = 0;
        netdev_for_each_mc_addr(mc, net_dev) {
                if (i >= EFX_EF10_FILTER_DEV_MC_MAX) {
                        table->mc_promisc = true;
                        table->mc_overflow = true;
                        break;
                }
                ether_addr_copy(table->dev_mc_list[i].addr, mc->addr);
                i++;
        }

        table->dev_mc_count = i;
}

/*
 * Caller must hold efx->filter_sem for read if race against
 * efx_mcdi_filter_table_remove() is possible
 */
void efx_mcdi_filter_sync_rx_mode(struct efx_nic *efx)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;
        struct net_device *net_dev = efx->net_dev;
        struct efx_mcdi_filter_vlan *vlan;
        bool vlan_filter;

        if (!efx_dev_registered(efx))
                return;

        if (!table)
                return;

        efx_mcdi_filter_mark_old(efx);

        /*
         * Copy/convert the address lists; add the primary station
         * address and broadcast address
         */
        netif_addr_lock_bh(net_dev);
        efx_mcdi_filter_uc_addr_list(efx);
        efx_mcdi_filter_mc_addr_list(efx);
        netif_addr_unlock_bh(net_dev);

        /*
         * If VLAN filtering changes, all old filters are finally removed.
         * Do it in advance to avoid conflicts for unicast untagged and
         * VLAN 0 tagged filters.
         */
        vlan_filter = !!(net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER);
        if (table->vlan_filter != vlan_filter) {
                table->vlan_filter = vlan_filter;
                efx_mcdi_filter_remove_old(efx);
        }

        list_for_each_entry(vlan, &table->vlan_list, list)
                efx_mcdi_filter_vlan_sync_rx_mode(efx, vlan);

        efx_mcdi_filter_remove_old(efx);
        table->mc_promisc_last = table->mc_promisc;
}

#ifdef CONFIG_RFS_ACCEL

bool efx_mcdi_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
                                    unsigned int filter_idx)
{
        struct efx_filter_spec *spec, saved_spec;
        struct efx_mcdi_filter_table *table;
        struct efx_arfs_rule *rule = NULL;
        bool ret = true, force = false;
        u16 arfs_id;

        down_read(&efx->filter_sem);
        table = efx->filter_state;
        down_write(&table->lock);
        spec = efx_mcdi_filter_entry_spec(table, filter_idx);

        if (!spec || spec->priority != EFX_FILTER_PRI_HINT)
                goto out_unlock;

        spin_lock_bh(&efx->rps_hash_lock);
        if (!efx->rps_hash_table) {
                /* In the absence of the table, we always return 0 to ARFS. */
                arfs_id = 0;
        } else {
                rule = efx_rps_hash_find(efx, spec);
                if (!rule)
                        /* ARFS table doesn't know of this filter, so remove it */
                        goto expire;
                arfs_id = rule->arfs_id;
                ret = efx_rps_check_rule(rule, filter_idx, &force);
                if (force)
                        goto expire;
                if (!ret) {
                        spin_unlock_bh(&efx->rps_hash_lock);
                        goto out_unlock;
                }
        }
        if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id, flow_id, arfs_id))
                ret = false;
        else if (rule)
                rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
expire:
        saved_spec = *spec; /* remove operation will kfree spec */
        spin_unlock_bh(&efx->rps_hash_lock);
        /*
         * At this point (since we dropped the lock), another thread might queue
         * up a fresh insertion request (but the actual insertion will be held
         * up by our possession of the filter table lock).  In that case, it
         * will set rule->filter_id to EFX_ARFS_FILTER_ID_PENDING, meaning that
         * the rule is not removed by efx_rps_hash_del() below.
         */
        if (ret)
                ret = efx_mcdi_filter_remove_internal(efx, 1U << spec->priority,
                                                      filter_idx, true) == 0;
        /*
         * While we can't safely dereference rule (we dropped the lock), we can
         * still test it for NULL.
         */
        if (ret && rule) {
                /* Expiring, so remove entry from ARFS table */
                spin_lock_bh(&efx->rps_hash_lock);
                efx_rps_hash_del(efx, &saved_spec);
                spin_unlock_bh(&efx->rps_hash_lock);
        }
out_unlock:
        up_write(&table->lock);
        up_read(&efx->filter_sem);
        return ret;
}

#endif /* CONFIG_RFS_ACCEL */

#define RSS_MODE_HASH_ADDRS     (1 << RSS_MODE_HASH_SRC_ADDR_LBN |\
                                 1 << RSS_MODE_HASH_DST_ADDR_LBN)
#define RSS_MODE_HASH_PORTS     (1 << RSS_MODE_HASH_SRC_PORT_LBN |\
                                 1 << RSS_MODE_HASH_DST_PORT_LBN)
#define RSS_CONTEXT_FLAGS_DEFAULT       (1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV4_EN_LBN |\
                                         1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV4_EN_LBN |\
                                         1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV6_EN_LBN |\
                                         1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV6_EN_LBN |\
                                         (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV4_RSS_MODE_LBN |\
                                         RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN |\
                                         RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV4_RSS_MODE_LBN |\
                                         (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV6_RSS_MODE_LBN |\
                                         RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN |\
                                         RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV6_RSS_MODE_LBN)

static int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context,
                                          u32 *flags)
{
        /*
         * Firmware had a bug (sfc bug 61952) where it would not actually
         * fill in the flags field in the response to MC_CMD_RSS_CONTEXT_GET_FLAGS.
         * This meant that it would always contain whatever was previously
         * in the MCDI buffer.  Fortunately, all firmware versions with
         * this bug have the same default flags value for a newly-allocated
         * RSS context, and the only time we want to get the flags is just
         * after allocating.  Moreover, the response has a 32-bit hole
         * where the context ID would be in the request, so we can use an
         * overlength buffer in the request and pre-fill the flags field
         * with what we believe the default to be.  Thus if the firmware
         * has the bug, it will leave our pre-filled value in the flags
         * field of the response, and we will get the right answer.
         *
         * However, this does mean that this function should NOT be used if
         * the RSS context flags might not be their defaults - it is ONLY
         * reliably correct for a newly-allocated RSS context.
         */
        MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN);
        MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN);
        size_t outlen;
        int rc;

        /* Check we have a hole for the context ID */
        BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_FLAGS_IN_LEN != MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_FLAGS_OFST);
        MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_IN_RSS_CONTEXT_ID, context);
        MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS,
                       RSS_CONTEXT_FLAGS_DEFAULT);
        rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_FLAGS, inbuf,
                          sizeof(inbuf), outbuf, sizeof(outbuf), &outlen);
        if (rc == 0) {
                if (outlen < MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN)
                        rc = -EIO;
                else
                        *flags = MCDI_DWORD(outbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS);
        }
        return rc;
}

/*
 * Attempt to enable 4-tuple UDP hashing on the specified RSS context.
 * If we fail, we just leave the RSS context at its default hash settings,
 * which is safe but may slightly reduce performance.
 * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we
 * just need to set the UDP ports flags (for both IP versions).
 */
static void efx_mcdi_set_rss_context_flags(struct efx_nic *efx,
                                           struct efx_rss_context_priv *ctx)
{
        MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN);
        u32 flags;

        BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN != 0);

        if (efx_mcdi_get_rss_context_flags(efx, ctx->context_id, &flags) != 0)
                return;
        MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID,
                       ctx->context_id);
        flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN;
        flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN;
        MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_FLAGS, flags);
        if (!efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_FLAGS, inbuf, sizeof(inbuf),
                          NULL, 0, NULL))
                /* Succeeded, so UDP 4-tuple is now enabled */
                ctx->rx_hash_udp_4tuple = true;
}

static int efx_mcdi_filter_alloc_rss_context(struct efx_nic *efx, bool exclusive,
                                             struct efx_rss_context_priv *ctx,
                                             unsigned *context_size)
{
        MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN);
        MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN);
        size_t outlen;
        int rc;
        u32 alloc_type = exclusive ?
                                MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE :
                                MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_SHARED;
        unsigned rss_spread = exclusive ?
                                efx->rss_spread :
                                min(rounddown_pow_of_two(efx->rss_spread),
                                    EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE);

        if (!exclusive && rss_spread == 1) {
                ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
                if (context_size)
                        *context_size = 1;
                return 0;
        }

        if (efx_has_cap(efx, RX_RSS_LIMITED))
                return -EOPNOTSUPP;

        MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_UPSTREAM_PORT_ID,
                       efx->vport_id);
        MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_TYPE, alloc_type);
        MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_NUM_QUEUES, rss_spread);

        rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_ALLOC, inbuf, sizeof(inbuf),
                outbuf, sizeof(outbuf), &outlen);
        if (rc != 0)
                return rc;

        if (outlen < MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN)
                return -EIO;

        ctx->context_id = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID);

        if (context_size)
                *context_size = rss_spread;

        if (efx_has_cap(efx, ADDITIONAL_RSS_MODES))
                efx_mcdi_set_rss_context_flags(efx, ctx);

        return 0;
}

static int efx_mcdi_filter_free_rss_context(struct efx_nic *efx, u32 context)
{
        MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_FREE_IN_LEN);

        MCDI_SET_DWORD(inbuf, RSS_CONTEXT_FREE_IN_RSS_CONTEXT_ID,
                       context);
        return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_FREE, inbuf, sizeof(inbuf),
                            NULL, 0, NULL);
}

static int efx_mcdi_filter_populate_rss_table(struct efx_nic *efx, u32 context,
                                       const u32 *rx_indir_table, const u8 *key)
{
        MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_SET_TABLE_IN_LEN);
        MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_SET_KEY_IN_LEN);
        int i, rc;

        MCDI_SET_DWORD(tablebuf, RSS_CONTEXT_SET_TABLE_IN_RSS_CONTEXT_ID,
                       context);
        BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
                     MC_CMD_RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE_LEN);

        /* This iterates over the length of efx->rss_context.rx_indir_table, but
         * copies bytes from rx_indir_table.  That's because the latter is a
         * pointer rather than an array, but should have the same length.
         * The efx->rss_context.rx_hash_key loop below is similar.
         */
        for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_indir_table); ++i)
                MCDI_PTR(tablebuf,
                         RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE)[i] =
                                (u8) rx_indir_table[i];

        rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_TABLE, tablebuf,
                          sizeof(tablebuf), NULL, 0, NULL);
        if (rc != 0)
                return rc;

        MCDI_SET_DWORD(keybuf, RSS_CONTEXT_SET_KEY_IN_RSS_CONTEXT_ID,
                       context);
        BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_hash_key) !=
                     MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
        for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_hash_key); ++i)
                MCDI_PTR(keybuf, RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY)[i] = key[i];

        return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_KEY, keybuf,
                            sizeof(keybuf), NULL, 0, NULL);
}

void efx_mcdi_rx_free_indir_table(struct efx_nic *efx)
{
        int rc;

        if (efx->rss_context.priv.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) {
                rc = efx_mcdi_filter_free_rss_context(efx, efx->rss_context.priv.context_id);
                WARN_ON(rc != 0);
        }
        efx->rss_context.priv.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
}

static int efx_mcdi_filter_rx_push_shared_rss_config(struct efx_nic *efx,
                                              unsigned *context_size)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;
        int rc = efx_mcdi_filter_alloc_rss_context(efx, false,
                                                   &efx->rss_context.priv,
                                                   context_size);

        if (rc != 0)
                return rc;

        table->rx_rss_context_exclusive = false;
        efx_set_default_rx_indir_table(efx, efx->rss_context.rx_indir_table);
        return 0;
}

static int efx_mcdi_filter_rx_push_exclusive_rss_config(struct efx_nic *efx,
                                                 const u32 *rx_indir_table,
                                                 const u8 *key)
{
        u32 old_rx_rss_context = efx->rss_context.priv.context_id;
        struct efx_mcdi_filter_table *table = efx->filter_state;
        int rc;

        if (efx->rss_context.priv.context_id == EFX_MCDI_RSS_CONTEXT_INVALID ||
            !table->rx_rss_context_exclusive) {
                rc = efx_mcdi_filter_alloc_rss_context(efx, true,
                                                       &efx->rss_context.priv,
                                                       NULL);
                if (rc == -EOPNOTSUPP)
                        return rc;
                else if (rc != 0)
                        goto fail1;
        }

        rc = efx_mcdi_filter_populate_rss_table(efx, efx->rss_context.priv.context_id,
                                                rx_indir_table, key);
        if (rc != 0)
                goto fail2;

        if (efx->rss_context.priv.context_id != old_rx_rss_context &&
            old_rx_rss_context != EFX_MCDI_RSS_CONTEXT_INVALID)
                WARN_ON(efx_mcdi_filter_free_rss_context(efx, old_rx_rss_context) != 0);
        table->rx_rss_context_exclusive = true;
        if (rx_indir_table != efx->rss_context.rx_indir_table)
                memcpy(efx->rss_context.rx_indir_table, rx_indir_table,
                       sizeof(efx->rss_context.rx_indir_table));
        if (key != efx->rss_context.rx_hash_key)
                memcpy(efx->rss_context.rx_hash_key, key,
                       efx->type->rx_hash_key_size);

        return 0;

fail2:
        if (old_rx_rss_context != efx->rss_context.priv.context_id) {
                WARN_ON(efx_mcdi_filter_free_rss_context(efx, efx->rss_context.priv.context_id) != 0);
                efx->rss_context.priv.context_id = old_rx_rss_context;
        }
fail1:
        netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
        return rc;
}

int efx_mcdi_rx_push_rss_context_config(struct efx_nic *efx,
                                        struct efx_rss_context_priv *ctx,
                                        const u32 *rx_indir_table,
                                        const u8 *key, bool delete)
{
        int rc;

        WARN_ON(!mutex_is_locked(&efx->net_dev->ethtool->rss_lock));

        if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) {
                if (delete)
                        /* already wasn't in HW, nothing to do */
                        return 0;
                rc = efx_mcdi_filter_alloc_rss_context(efx, true, ctx, NULL);
                if (rc)
                        return rc;
        }

        if (delete) /* Delete this context */
                return efx_mcdi_filter_free_rss_context(efx, ctx->context_id);

        return efx_mcdi_filter_populate_rss_table(efx, ctx->context_id,
                                                  rx_indir_table, key);
}

int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx,
                                        struct efx_rss_context *ctx)
{
        MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN);
        MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN);
        MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN);
        size_t outlen;
        int rc, i;

        WARN_ON(!mutex_is_locked(&efx->net_dev->ethtool->rss_lock));

        BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN !=
                     MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN);

        if (ctx->priv.context_id == EFX_MCDI_RSS_CONTEXT_INVALID)
                return -ENOENT;

        MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_TABLE_IN_RSS_CONTEXT_ID,
                       ctx->priv.context_id);
        BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_indir_table) !=
                     MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE_LEN);
        rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_TABLE, inbuf, sizeof(inbuf),
                          tablebuf, sizeof(tablebuf), &outlen);
        if (rc != 0)
                return rc;

        if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN))
                return -EIO;

        for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
                ctx->rx_indir_table[i] = MCDI_PTR(tablebuf,
                                RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE)[i];

        MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_KEY_IN_RSS_CONTEXT_ID,
                       ctx->priv.context_id);
        BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_hash_key) !=
                     MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
        rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_KEY, inbuf, sizeof(inbuf),
                          keybuf, sizeof(keybuf), &outlen);
        if (rc != 0)
                return rc;

        if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN))
                return -EIO;

        for (i = 0; i < ARRAY_SIZE(ctx->rx_hash_key); ++i)
                ctx->rx_hash_key[i] = MCDI_PTR(
                                keybuf, RSS_CONTEXT_GET_KEY_OUT_TOEPLITZ_KEY)[i];

        return 0;
}

int efx_mcdi_rx_pull_rss_config(struct efx_nic *efx)
{
        return efx_mcdi_rx_pull_rss_context_config(efx, &efx->rss_context);
}

void efx_mcdi_rx_restore_rss_contexts(struct efx_nic *efx)
{
        struct efx_mcdi_filter_table *table = efx->filter_state;
        struct ethtool_rxfh_context *ctx;
        unsigned long context;
        int rc;

        WARN_ON(!mutex_is_locked(&efx->net_dev->ethtool->rss_lock));

        if (!table->must_restore_rss_contexts)
                return;

        xa_for_each(&efx->net_dev->ethtool->rss_ctx, context, ctx) {
                struct efx_rss_context_priv *priv;
                u32 *indir;
                u8 *key;

                priv = ethtool_rxfh_context_priv(ctx);
                /* previous NIC RSS context is gone */
                priv->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
                /* so try to allocate a new one */
                indir = ethtool_rxfh_context_indir(ctx);
                key = ethtool_rxfh_context_key(ctx);
                rc = efx_mcdi_rx_push_rss_context_config(efx, priv, indir, key,
                                                         false);
                if (rc)
                        netif_warn(efx, probe, efx->net_dev,
                                   "failed to restore RSS context %lu, rc=%d"
                                   "; RSS filters may fail to be applied\n",
                                   context, rc);
        }
        table->must_restore_rss_contexts = false;
}

int efx_mcdi_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
                                   const u32 *rx_indir_table,
                                   const u8 *key)
{
        int rc;

        if (efx->rss_spread == 1)
                return 0;

        if (!key)
                key = efx->rss_context.rx_hash_key;

        rc = efx_mcdi_filter_rx_push_exclusive_rss_config(efx, rx_indir_table, key);

        if (rc == -ENOBUFS && !user) {
                unsigned context_size;
                bool mismatch = false;
                size_t i;

                for (i = 0;
                     i < ARRAY_SIZE(efx->rss_context.rx_indir_table) && !mismatch;
                     i++)
                        mismatch = rx_indir_table[i] !=
                                ethtool_rxfh_indir_default(i, efx->rss_spread);

                rc = efx_mcdi_filter_rx_push_shared_rss_config(efx, &context_size);
                if (rc == 0) {
                        if (context_size != efx->rss_spread)
                                netif_warn(efx, probe, efx->net_dev,
                                           "Could not allocate an exclusive RSS"
                                           " context; allocated a shared one of"
                                           " different size."
                                           " Wanted %u, got %u.\n",
                                           efx->rss_spread, context_size);
                        else if (mismatch)
                                netif_warn(efx, probe, efx->net_dev,
                                           "Could not allocate an exclusive RSS"
                                           " context; allocated a shared one but"
                                           " could not apply custom"
                                           " indirection.\n");
                        else
                                netif_info(efx, probe, efx->net_dev,
                                           "Could not allocate an exclusive RSS"
                                           " context; allocated a shared one.\n");
                }
        }
        return rc;
}

int efx_mcdi_vf_rx_push_rss_config(struct efx_nic *efx, bool user,
                                   const u32 *rx_indir_table
                                   __attribute__ ((unused)),
                                   const u8 *key
                                   __attribute__ ((unused)))
{
        if (user)
                return -EOPNOTSUPP;
        if (efx->rss_context.priv.context_id != EFX_MCDI_RSS_CONTEXT_INVALID)
                return 0;
        return efx_mcdi_filter_rx_push_shared_rss_config(efx, NULL);
}

int efx_mcdi_push_default_indir_table(struct efx_nic *efx,
                                      unsigned int rss_spread)
{
        int rc = 0;

        if (efx->rss_spread == rss_spread)
                return 0;

        efx->rss_spread = rss_spread;
        if (!efx->filter_state)
                return 0;

        efx_mcdi_rx_free_indir_table(efx);
        if (rss_spread > 1) {
                efx_set_default_rx_indir_table(efx, efx->rss_context.rx_indir_table);
                rc = efx->type->rx_push_rss_config(efx, false,
                                   efx->rss_context.rx_indir_table, NULL);
        }
        return rc;
}