root/net/netfilter/nf_tables_offload.c
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <net/flow_offload.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_offload.h>
#include <net/pkt_cls.h>

static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions)
{
        struct nft_flow_rule *flow;

        flow = kzalloc_obj(struct nft_flow_rule);
        if (!flow)
                return NULL;

        flow->rule = flow_rule_alloc(num_actions);
        if (!flow->rule) {
                kfree(flow);
                return NULL;
        }

        flow->rule->match.dissector     = &flow->match.dissector;
        flow->rule->match.mask          = &flow->match.mask;
        flow->rule->match.key           = &flow->match.key;

        return flow;
}

void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
                                 enum flow_dissector_key_id addr_type)
{
        struct nft_flow_match *match = &flow->match;
        struct nft_flow_key *mask = &match->mask;
        struct nft_flow_key *key = &match->key;

        if (match->dissector.used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL))
                return;

        key->control.addr_type = addr_type;
        mask->control.addr_type = 0xffff;
        match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL);
        match->dissector.offset[FLOW_DISSECTOR_KEY_CONTROL] =
                offsetof(struct nft_flow_key, control);
}

struct nft_offload_ethertype {
        __be16 value;
        __be16 mask;
};

static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
                                        struct nft_flow_rule *flow)
{
        struct nft_flow_match *match = &flow->match;
        struct nft_offload_ethertype ethertype = {
                .value  = match->key.basic.n_proto,
                .mask   = match->mask.basic.n_proto,
        };

        if (match->dissector.used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) &&
            (match->key.vlan.vlan_tpid == htons(ETH_P_8021Q) ||
             match->key.vlan.vlan_tpid == htons(ETH_P_8021AD))) {
                match->key.basic.n_proto = match->key.cvlan.vlan_tpid;
                match->mask.basic.n_proto = match->mask.cvlan.vlan_tpid;
                match->key.cvlan.vlan_tpid = match->key.vlan.vlan_tpid;
                match->mask.cvlan.vlan_tpid = match->mask.vlan.vlan_tpid;
                match->key.vlan.vlan_tpid = ethertype.value;
                match->mask.vlan.vlan_tpid = ethertype.mask;
                match->dissector.offset[FLOW_DISSECTOR_KEY_CVLAN] =
                        offsetof(struct nft_flow_key, cvlan);
                match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN);
        } else if (match->dissector.used_keys &
                   BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) &&
                   (match->key.basic.n_proto == htons(ETH_P_8021Q) ||
                    match->key.basic.n_proto == htons(ETH_P_8021AD))) {
                match->key.basic.n_proto = match->key.vlan.vlan_tpid;
                match->mask.basic.n_proto = match->mask.vlan.vlan_tpid;
                match->key.vlan.vlan_tpid = ethertype.value;
                match->mask.vlan.vlan_tpid = ethertype.mask;
                match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] =
                        offsetof(struct nft_flow_key, vlan);
                match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_VLAN);
        }
}

struct nft_flow_rule *nft_flow_rule_create(struct net *net,
                                           const struct nft_rule *rule)
{
        struct nft_offload_ctx *ctx;
        struct nft_flow_rule *flow;
        int num_actions = 0, err;
        struct nft_expr *expr;

        expr = nft_expr_first(rule);
        while (nft_expr_more(rule, expr)) {
                if (expr->ops->offload_action &&
                    expr->ops->offload_action(expr))
                        num_actions++;

                expr = nft_expr_next(expr);
        }

        if (num_actions == 0)
                return ERR_PTR(-EOPNOTSUPP);

        flow = nft_flow_rule_alloc(num_actions);
        if (!flow)
                return ERR_PTR(-ENOMEM);

        expr = nft_expr_first(rule);

        ctx = kzalloc_obj(struct nft_offload_ctx);
        if (!ctx) {
                err = -ENOMEM;
                goto err_out;
        }
        ctx->net = net;
        ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC;

        while (nft_expr_more(rule, expr)) {
                if (!expr->ops->offload) {
                        err = -EOPNOTSUPP;
                        goto err_out;
                }
                err = expr->ops->offload(ctx, flow, expr);
                if (err < 0)
                        goto err_out;

                expr = nft_expr_next(expr);
        }
        nft_flow_rule_transfer_vlan(ctx, flow);

        flow->proto = ctx->dep.l3num;
        kfree(ctx);

        return flow;
err_out:
        kfree(ctx);
        nft_flow_rule_destroy(flow);

        return ERR_PTR(err);
}

void nft_flow_rule_destroy(struct nft_flow_rule *flow)
{
        struct flow_action_entry *entry;
        int i;

        flow_action_for_each(i, entry, &flow->rule->action) {
                switch (entry->id) {
                case FLOW_ACTION_REDIRECT:
                case FLOW_ACTION_MIRRED:
                        dev_put(entry->dev);
                        break;
                default:
                        break;
                }
        }
        kfree(flow->rule);
        kfree(flow);
}

void nft_offload_set_dependency(struct nft_offload_ctx *ctx,
                                enum nft_offload_dep_type type)
{
        ctx->dep.type = type;
}

void nft_offload_update_dependency(struct nft_offload_ctx *ctx,
                                   const void *data, u32 len)
{
        switch (ctx->dep.type) {
        case NFT_OFFLOAD_DEP_NETWORK:
                WARN_ON(len != sizeof(__u16));
                memcpy(&ctx->dep.l3num, data, sizeof(__u16));
                break;
        case NFT_OFFLOAD_DEP_TRANSPORT:
                WARN_ON(len != sizeof(__u8));
                memcpy(&ctx->dep.protonum, data, sizeof(__u8));
                break;
        default:
                break;
        }
        ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC;
}

static void nft_flow_offload_common_init(struct flow_cls_common_offload *common,
                                         __be16 proto, int priority,
                                         struct netlink_ext_ack *extack)
{
        common->protocol = proto;
        common->prio = priority;
        common->extack = extack;
}

static int nft_setup_cb_call(enum tc_setup_type type, void *type_data,
                             struct list_head *cb_list)
{
        struct flow_block_cb *block_cb;
        int err;

        list_for_each_entry(block_cb, cb_list, list) {
                err = block_cb->cb(type, type_data, block_cb->cb_priv);
                if (err < 0)
                        return err;
        }
        return 0;
}

static int nft_chain_offload_priority(const struct nft_base_chain *basechain)
{
        if (basechain->ops.priority <= 0 ||
            basechain->ops.priority > USHRT_MAX)
                return -1;

        return 0;
}

bool nft_chain_offload_support(const struct nft_base_chain *basechain)
{
        struct nf_hook_ops *ops;
        struct net_device *dev;
        struct nft_hook *hook;

        if (nft_chain_offload_priority(basechain) < 0)
                return false;

        list_for_each_entry(hook, &basechain->hook_list, list) {
                list_for_each_entry(ops, &hook->ops_list, list) {
                        if (ops->pf != NFPROTO_NETDEV ||
                            ops->hooknum != NF_NETDEV_INGRESS)
                                return false;

                        dev = ops->dev;
                        if (!dev->netdev_ops->ndo_setup_tc &&
                            !flow_indr_dev_exists())
                                return false;
                }
        }

        return true;
}

static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
                                       const struct nft_base_chain *basechain,
                                       const struct nft_rule *rule,
                                       const struct nft_flow_rule *flow,
                                       struct netlink_ext_ack *extack,
                                       enum flow_cls_command command)
{
        __be16 proto = ETH_P_ALL;

        memset(cls_flow, 0, sizeof(*cls_flow));

        if (flow)
                proto = flow->proto;

        nft_flow_offload_common_init(&cls_flow->common, proto,
                                     basechain->ops.priority, extack);
        cls_flow->command = command;
        cls_flow->cookie = (unsigned long) rule;
        if (flow)
                cls_flow->rule = flow->rule;
}

static int nft_flow_offload_cmd(const struct nft_chain *chain,
                                const struct nft_rule *rule,
                                struct nft_flow_rule *flow,
                                enum flow_cls_command command,
                                struct flow_cls_offload *cls_flow)
{
        struct netlink_ext_ack extack = {};
        struct nft_base_chain *basechain;

        if (!nft_is_base_chain(chain))
                return -EOPNOTSUPP;

        basechain = nft_base_chain(chain);
        nft_flow_cls_offload_setup(cls_flow, basechain, rule, flow, &extack,
                                   command);

        return nft_setup_cb_call(TC_SETUP_CLSFLOWER, cls_flow,
                                 &basechain->flow_block.cb_list);
}

static int nft_flow_offload_rule(const struct nft_chain *chain,
                                 struct nft_rule *rule,
                                 struct nft_flow_rule *flow,
                                 enum flow_cls_command command)
{
        struct flow_cls_offload cls_flow;

        return nft_flow_offload_cmd(chain, rule, flow, command, &cls_flow);
}

int nft_flow_rule_stats(const struct nft_chain *chain,
                        const struct nft_rule *rule)
{
        struct flow_cls_offload cls_flow = {};
        struct nft_expr *expr, *next;
        int err;

        err = nft_flow_offload_cmd(chain, rule, NULL, FLOW_CLS_STATS,
                                   &cls_flow);
        if (err < 0)
                return err;

        nft_rule_for_each_expr(expr, next, rule) {
                if (expr->ops->offload_stats)
                        expr->ops->offload_stats(expr, &cls_flow.stats);
        }

        return 0;
}

static int nft_flow_offload_bind(struct flow_block_offload *bo,
                                 struct nft_base_chain *basechain)
{
        list_splice(&bo->cb_list, &basechain->flow_block.cb_list);
        return 0;
}

static int nft_flow_offload_unbind(struct flow_block_offload *bo,
                                   struct nft_base_chain *basechain)
{
        struct flow_block_cb *block_cb, *next;
        struct flow_cls_offload cls_flow;
        struct netlink_ext_ack extack;
        struct nft_chain *chain;
        struct nft_rule *rule;

        chain = &basechain->chain;
        list_for_each_entry(rule, &chain->rules, list) {
                memset(&extack, 0, sizeof(extack));
                nft_flow_cls_offload_setup(&cls_flow, basechain, rule, NULL,
                                           &extack, FLOW_CLS_DESTROY);
                nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow, &bo->cb_list);
        }

        list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
                list_del(&block_cb->list);
                flow_block_cb_free(block_cb);
        }

        return 0;
}

static int nft_block_setup(struct nft_base_chain *basechain,
                           struct flow_block_offload *bo,
                           enum flow_block_command cmd)
{
        int err;

        switch (cmd) {
        case FLOW_BLOCK_BIND:
                err = nft_flow_offload_bind(bo, basechain);
                break;
        case FLOW_BLOCK_UNBIND:
                err = nft_flow_offload_unbind(bo, basechain);
                break;
        default:
                WARN_ON_ONCE(1);
                err = -EOPNOTSUPP;
        }

        return err;
}

static void nft_flow_block_offload_init(struct flow_block_offload *bo,
                                        struct net *net,
                                        enum flow_block_command cmd,
                                        struct nft_base_chain *basechain,
                                        struct netlink_ext_ack *extack)
{
        memset(bo, 0, sizeof(*bo));
        bo->net         = net;
        bo->block       = &basechain->flow_block;
        bo->command     = cmd;
        bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
        bo->extack      = extack;
        bo->cb_list_head = &basechain->flow_block.cb_list;
        INIT_LIST_HEAD(&bo->cb_list);
}

static int nft_block_offload_cmd(struct nft_base_chain *chain,
                                 struct net_device *dev,
                                 enum flow_block_command cmd)
{
        struct netlink_ext_ack extack = {};
        struct flow_block_offload bo;
        int err;

        nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);

        err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
        if (err < 0)
                return err;

        return nft_block_setup(chain, &bo, cmd);
}

static void nft_indr_block_cleanup(struct flow_block_cb *block_cb)
{
        struct nft_base_chain *basechain = block_cb->indr.data;
        struct net_device *dev = block_cb->indr.dev;
        struct netlink_ext_ack extack = {};
        struct nftables_pernet *nft_net;
        struct net *net = dev_net(dev);
        struct flow_block_offload bo;

        nft_flow_block_offload_init(&bo, dev_net(dev), FLOW_BLOCK_UNBIND,
                                    basechain, &extack);
        nft_net = nft_pernet(net);
        mutex_lock(&nft_net->commit_mutex);
        list_del(&block_cb->driver_list);
        list_move(&block_cb->list, &bo.cb_list);
        nft_flow_offload_unbind(&bo, basechain);
        mutex_unlock(&nft_net->commit_mutex);
}

static int nft_indr_block_offload_cmd(struct nft_base_chain *basechain,
                                      struct net_device *dev,
                                      enum flow_block_command cmd)
{
        struct netlink_ext_ack extack = {};
        struct flow_block_offload bo;
        int err;

        nft_flow_block_offload_init(&bo, dev_net(dev), cmd, basechain, &extack);

        err = flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_BLOCK, basechain, &bo,
                                          nft_indr_block_cleanup);
        if (err < 0)
                return err;

        if (list_empty(&bo.cb_list))
                return -EOPNOTSUPP;

        return nft_block_setup(basechain, &bo, cmd);
}

static int nft_chain_offload_cmd(struct nft_base_chain *basechain,
                                 struct net_device *dev,
                                 enum flow_block_command cmd)
{
        int err;

        if (dev->netdev_ops->ndo_setup_tc)
                err = nft_block_offload_cmd(basechain, dev, cmd);
        else
                err = nft_indr_block_offload_cmd(basechain, dev, cmd);

        return err;
}

static int nft_flow_block_chain(struct nft_base_chain *basechain,
                                const struct net_device *this_dev,
                                enum flow_block_command cmd)
{
        struct nf_hook_ops *ops;
        struct nft_hook *hook;
        int err, i = 0;

        list_for_each_entry(hook, &basechain->hook_list, list) {
                list_for_each_entry(ops, &hook->ops_list, list) {
                        if (this_dev && this_dev != ops->dev)
                                continue;

                        err = nft_chain_offload_cmd(basechain, ops->dev, cmd);
                        if (err < 0 && cmd == FLOW_BLOCK_BIND) {
                                if (!this_dev)
                                        goto err_flow_block;

                                return err;
                        }
                        i++;
                }
        }

        return 0;

err_flow_block:
        list_for_each_entry(hook, &basechain->hook_list, list) {
                list_for_each_entry(ops, &hook->ops_list, list) {
                        if (i-- <= 0)
                                break;

                        nft_chain_offload_cmd(basechain, ops->dev,
                                              FLOW_BLOCK_UNBIND);
                }
        }
        return err;
}

static int nft_flow_offload_chain(struct nft_chain *chain, u8 *ppolicy,
                                  enum flow_block_command cmd)
{
        struct nft_base_chain *basechain;
        u8 policy;

        if (!nft_is_base_chain(chain))
                return -EOPNOTSUPP;

        basechain = nft_base_chain(chain);
        policy = ppolicy ? *ppolicy : basechain->policy;

        /* Only default policy to accept is supported for now. */
        if (cmd == FLOW_BLOCK_BIND && policy == NF_DROP)
                return -EOPNOTSUPP;

        return nft_flow_block_chain(basechain, NULL, cmd);
}

static void nft_flow_rule_offload_abort(struct net *net,
                                        struct nft_trans *trans)
{
        struct nftables_pernet *nft_net = nft_pernet(net);
        int err = 0;

        list_for_each_entry_continue_reverse(trans, &nft_net->commit_list, list) {
                if (trans->table->family != NFPROTO_NETDEV)
                        continue;

                switch (trans->msg_type) {
                case NFT_MSG_NEWCHAIN:
                        if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) ||
                            nft_trans_chain_update(trans))
                                continue;

                        err = nft_flow_offload_chain(nft_trans_chain(trans), NULL,
                                                     FLOW_BLOCK_UNBIND);
                        break;
                case NFT_MSG_DELCHAIN:
                        if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
                                continue;

                        err = nft_flow_offload_chain(nft_trans_chain(trans), NULL,
                                                     FLOW_BLOCK_BIND);
                        break;
                case NFT_MSG_NEWRULE:
                        if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
                                continue;

                        err = nft_flow_offload_rule(nft_trans_rule_chain(trans),
                                                    nft_trans_rule(trans),
                                                    NULL, FLOW_CLS_DESTROY);
                        break;
                case NFT_MSG_DELRULE:
                        if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
                                continue;

                        err = nft_flow_offload_rule(nft_trans_rule_chain(trans),
                                                    nft_trans_rule(trans),
                                                    nft_trans_flow_rule(trans),
                                                    FLOW_CLS_REPLACE);
                        break;
                }

                if (WARN_ON_ONCE(err))
                        break;
        }
}

int nft_flow_rule_offload_commit(struct net *net)
{
        struct nftables_pernet *nft_net = nft_pernet(net);
        struct nft_trans *trans;
        int err = 0;
        u8 policy;

        list_for_each_entry(trans, &nft_net->commit_list, list) {
                if (trans->table->family != NFPROTO_NETDEV)
                        continue;

                switch (trans->msg_type) {
                case NFT_MSG_NEWCHAIN:
                        if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) ||
                            nft_trans_chain_update(trans))
                                continue;

                        policy = nft_trans_chain_policy(trans);
                        err = nft_flow_offload_chain(nft_trans_chain(trans), &policy,
                                                     FLOW_BLOCK_BIND);
                        break;
                case NFT_MSG_DELCHAIN:
                        if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
                                continue;

                        policy = nft_trans_chain_policy(trans);
                        err = nft_flow_offload_chain(nft_trans_chain(trans), &policy,
                                                     FLOW_BLOCK_UNBIND);
                        break;
                case NFT_MSG_NEWRULE:
                        if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
                                continue;

                        if (trans->flags & NLM_F_REPLACE ||
                            !(trans->flags & NLM_F_APPEND)) {
                                err = -EOPNOTSUPP;
                                break;
                        }
                        err = nft_flow_offload_rule(nft_trans_rule_chain(trans),
                                                    nft_trans_rule(trans),
                                                    nft_trans_flow_rule(trans),
                                                    FLOW_CLS_REPLACE);
                        break;
                case NFT_MSG_DELRULE:
                        if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
                                continue;

                        err = nft_flow_offload_rule(nft_trans_rule_chain(trans),
                                                    nft_trans_rule(trans),
                                                    NULL, FLOW_CLS_DESTROY);
                        break;
                }

                if (err) {
                        nft_flow_rule_offload_abort(net, trans);
                        break;
                }
        }

        return err;
}

static struct nft_chain *__nft_offload_get_chain(const struct nftables_pernet *nft_net,
                                                 struct net_device *dev)
{
        struct nft_base_chain *basechain;
        struct nft_hook *hook, *found;
        const struct nft_table *table;
        struct nft_chain *chain;

        list_for_each_entry(table, &nft_net->tables, list) {
                if (table->family != NFPROTO_NETDEV)
                        continue;

                list_for_each_entry(chain, &table->chains, list) {
                        if (!nft_is_base_chain(chain) ||
                            !(chain->flags & NFT_CHAIN_HW_OFFLOAD))
                                continue;

                        found = NULL;
                        basechain = nft_base_chain(chain);
                        list_for_each_entry(hook, &basechain->hook_list, list) {
                                if (!nft_hook_find_ops(hook, dev))
                                        continue;

                                found = hook;
                                break;
                        }
                        if (!found)
                                continue;

                        return chain;
                }
        }

        return NULL;
}

static int nft_offload_netdev_event(struct notifier_block *this,
                                    unsigned long event, void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct nftables_pernet *nft_net;
        struct net *net = dev_net(dev);
        struct nft_chain *chain;

        if (event != NETDEV_UNREGISTER)
                return NOTIFY_DONE;

        nft_net = nft_pernet(net);
        mutex_lock(&nft_net->commit_mutex);
        chain = __nft_offload_get_chain(nft_net, dev);
        if (chain)
                nft_flow_block_chain(nft_base_chain(chain), dev,
                                     FLOW_BLOCK_UNBIND);

        mutex_unlock(&nft_net->commit_mutex);

        return NOTIFY_DONE;
}

static struct notifier_block nft_offload_netdev_notifier = {
        .notifier_call  = nft_offload_netdev_event,
};

int nft_offload_init(void)
{
        return register_netdevice_notifier(&nft_offload_netdev_notifier);
}

void nft_offload_exit(void)
{
        unregister_netdevice_notifier(&nft_offload_netdev_notifier);
}