root/net/netfilter/nf_tables_trace.c
// SPDX-License-Identifier: GPL-2.0-only
/*
 * (C) 2015 Red Hat GmbH
 * Author: Florian Westphal <fw@strlen.de>
 */

#include <linux/module.h>
#include <linux/static_key.h>
#include <linux/hash.h>
#include <linux/siphash.h>
#include <linux/if_vlan.h>
#include <linux/init.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>

#define NFT_TRACETYPE_LL_HSIZE          20
#define NFT_TRACETYPE_NETWORK_HSIZE     40
#define NFT_TRACETYPE_TRANSPORT_HSIZE   20

DEFINE_STATIC_KEY_FALSE(nft_trace_enabled);
EXPORT_SYMBOL_GPL(nft_trace_enabled);

static int trace_fill_header(struct sk_buff *nlskb, u16 type,
                             const struct sk_buff *skb,
                             int off, unsigned int len)
{
        struct nlattr *nla;

        if (len == 0)
                return 0;

        nla = nla_reserve(nlskb, type, len);
        if (!nla || skb_copy_bits(skb, off, nla_data(nla), len))
                return -1;

        return 0;
}

static int nf_trace_fill_ll_header(struct sk_buff *nlskb,
                                   const struct sk_buff *skb)
{
        struct vlan_ethhdr veth;
        int off;

        BUILD_BUG_ON(sizeof(veth) > NFT_TRACETYPE_LL_HSIZE);

        off = skb_mac_header(skb) - skb->data;
        if (off != -ETH_HLEN)
                return -1;

        if (skb_copy_bits(skb, off, &veth, ETH_HLEN))
                return -1;

        veth.h_vlan_proto = skb->vlan_proto;
        veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
        veth.h_vlan_encapsulated_proto = skb->protocol;

        return nla_put(nlskb, NFTA_TRACE_LL_HEADER, sizeof(veth), &veth);
}

static int nf_trace_fill_dev_info(struct sk_buff *nlskb,
                                  const struct net_device *indev,
                                  const struct net_device *outdev)
{
        if (indev) {
                if (nla_put_be32(nlskb, NFTA_TRACE_IIF,
                                 htonl(indev->ifindex)))
                        return -1;

                if (nla_put_be16(nlskb, NFTA_TRACE_IIFTYPE,
                                 htons(indev->type)))
                        return -1;
        }

        if (outdev) {
                if (nla_put_be32(nlskb, NFTA_TRACE_OIF,
                                 htonl(outdev->ifindex)))
                        return -1;

                if (nla_put_be16(nlskb, NFTA_TRACE_OIFTYPE,
                                 htons(outdev->type)))
                        return -1;
        }

        return 0;
}

static int nf_trace_fill_ct_info(struct sk_buff *nlskb,
                                 const struct sk_buff *skb)
{
        const struct nf_ct_hook *ct_hook;
        enum ip_conntrack_info ctinfo;
        const struct nf_conn *ct;
        u32 state;

        ct_hook = rcu_dereference(nf_ct_hook);
        if (!ct_hook)
                return 0;

        ct = nf_ct_get(skb, &ctinfo);
        if (!ct) {
                if (ctinfo != IP_CT_UNTRACKED) /* not seen by conntrack or invalid */
                        return 0;

                state = NF_CT_STATE_UNTRACKED_BIT;
        } else {
                state = NF_CT_STATE_BIT(ctinfo);
        }

        if (nla_put_be32(nlskb, NFTA_TRACE_CT_STATE, htonl(state)))
                return -1;

        if (ct) {
                u32 id = ct_hook->get_id(&ct->ct_general);
                u32 status = READ_ONCE(ct->status);
                u8 dir = CTINFO2DIR(ctinfo);

                if (nla_put_u8(nlskb, NFTA_TRACE_CT_DIRECTION, dir))
                        return -1;

                if (nla_put_be32(nlskb, NFTA_TRACE_CT_ID, (__force __be32)id))
                        return -1;

                /* Kernel implementation detail, withhold this from userspace for now */
                status &= ~IPS_NAT_CLASH;

                if (status && nla_put_be32(nlskb, NFTA_TRACE_CT_STATUS, htonl(status)))
                        return -1;
        }

        return 0;
}

static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
                                  const struct nft_pktinfo *pkt)
{
        const struct sk_buff *skb = pkt->skb;
        int off = skb_network_offset(skb);
        unsigned int len, nh_end;

        nh_end = pkt->flags & NFT_PKTINFO_L4PROTO ? nft_thoff(pkt) : skb->len;
        len = min_t(unsigned int, nh_end - skb_network_offset(skb),
                    NFT_TRACETYPE_NETWORK_HSIZE);
        if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len))
                return -1;

        if (pkt->flags & NFT_PKTINFO_L4PROTO) {
                len = min_t(unsigned int, skb->len - nft_thoff(pkt),
                            NFT_TRACETYPE_TRANSPORT_HSIZE);
                if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
                                      nft_thoff(pkt), len))
                        return -1;
        }

        if (!skb_mac_header_was_set(skb))
                return 0;

        if (skb_vlan_tag_get(skb))
                return nf_trace_fill_ll_header(nlskb, skb);

        off = skb_mac_header(skb) - skb->data;
        len = min_t(unsigned int, -off, NFT_TRACETYPE_LL_HSIZE);
        return trace_fill_header(nlskb, NFTA_TRACE_LL_HEADER,
                                 skb, off, len);
}

static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
                                   const struct nft_verdict *verdict,
                                   const struct nft_rule_dp *rule,
                                   const struct nft_traceinfo *info)
{
        if (!rule || rule->is_last)
                return 0;

        /* a continue verdict with ->type == RETURN means that this is
         * an implicit return (end of chain reached).
         *
         * Since no rule matched, the ->rule pointer is invalid.
         */
        if (info->type == NFT_TRACETYPE_RETURN &&
            verdict->code == NFT_CONTINUE)
                return 0;

        return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE,
                            cpu_to_be64(rule->handle),
                            NFTA_TRACE_PAD);
}

static bool nft_trace_have_verdict_chain(const struct nft_verdict *verdict,
                                         struct nft_traceinfo *info)
{
        switch (info->type) {
        case NFT_TRACETYPE_RETURN:
        case NFT_TRACETYPE_RULE:
                break;
        default:
                return false;
        }

        switch (verdict->code) {
        case NFT_JUMP:
        case NFT_GOTO:
                break;
        default:
                return false;
        }

        return true;
}

static const struct nft_chain *nft_trace_get_chain(const struct nft_rule_dp *rule,
                                                   const struct nft_traceinfo *info)
{
        const struct nft_rule_dp_last *last;

        if (!rule)
                return &info->basechain->chain;

        while (!rule->is_last)
                rule = nft_rule_next(rule);

        last = (const struct nft_rule_dp_last *)rule;

        if (WARN_ON_ONCE(!last->chain))
                return &info->basechain->chain;

        return last->chain;
}

void nft_trace_notify(const struct nft_pktinfo *pkt,
                      const struct nft_verdict *verdict,
                      const struct nft_rule_dp *rule,
                      struct nft_traceinfo *info)
{
        const struct nft_chain *chain;
        struct nlmsghdr *nlh;
        struct sk_buff *skb;
        unsigned int size;
        u32 mark = 0;
        u16 event;

        if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE))
                return;

        chain = nft_trace_get_chain(rule, info);

        size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
                nla_total_size(strlen(chain->table->name)) +
                nla_total_size(strlen(chain->name)) +
                nla_total_size_64bit(sizeof(__be64)) +  /* rule handle */
                nla_total_size(sizeof(__be32)) +        /* trace type */
                nla_total_size(0) +                     /* VERDICT, nested */
                        nla_total_size(sizeof(u32)) +   /* verdict code */
                nla_total_size(sizeof(u32)) +           /* ct id */
                nla_total_size(sizeof(u8)) +            /* ct direction */
                nla_total_size(sizeof(u32)) +           /* ct state */
                nla_total_size(sizeof(u32)) +           /* ct status */
                nla_total_size(sizeof(u32)) +           /* trace id */
                nla_total_size(NFT_TRACETYPE_LL_HSIZE) +
                nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) +
                nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) +
                nla_total_size(sizeof(u32)) +           /* iif */
                nla_total_size(sizeof(__be16)) +        /* iiftype */
                nla_total_size(sizeof(u32)) +           /* oif */
                nla_total_size(sizeof(__be16)) +        /* oiftype */
                nla_total_size(sizeof(u32)) +           /* mark */
                nla_total_size(sizeof(u32)) +           /* nfproto */
                nla_total_size(sizeof(u32));            /* policy */

        if (nft_trace_have_verdict_chain(verdict, info))
                size += nla_total_size(strlen(verdict->chain->name)); /* jump target */

        skb = nlmsg_new(size, GFP_ATOMIC);
        if (!skb)
                return;

        event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_TRACE);
        nlh = nfnl_msg_put(skb, 0, 0, event, 0, info->basechain->type->family,
                           NFNETLINK_V0, 0);
        if (!nlh)
                goto nla_put_failure;

        if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt))))
                goto nla_put_failure;

        if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type)))
                goto nla_put_failure;

        if (nla_put_u32(skb, NFTA_TRACE_ID, info->skbid))
                goto nla_put_failure;

        if (nla_put_string(skb, NFTA_TRACE_CHAIN, chain->name))
                goto nla_put_failure;

        if (nla_put_string(skb, NFTA_TRACE_TABLE, chain->table->name))
                goto nla_put_failure;

        if (nf_trace_fill_rule_info(skb, verdict, rule, info))
                goto nla_put_failure;

        switch (info->type) {
        case NFT_TRACETYPE_UNSPEC:
        case __NFT_TRACETYPE_MAX:
                break;
        case NFT_TRACETYPE_RETURN:
        case NFT_TRACETYPE_RULE: {
                unsigned int v;

                if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, verdict))
                        goto nla_put_failure;

                /* pkt->skb undefined iff NF_STOLEN, disable dump */
                v = verdict->code & NF_VERDICT_MASK;
                if (v == NF_STOLEN)
                        info->packet_dumped = true;
                else
                        mark = pkt->skb->mark;

                break;
        }
        case NFT_TRACETYPE_POLICY:
                mark = pkt->skb->mark;

                if (nla_put_be32(skb, NFTA_TRACE_POLICY,
                                 htonl(info->basechain->policy)))
                        goto nla_put_failure;
                break;
        }

        if (mark && nla_put_be32(skb, NFTA_TRACE_MARK, htonl(mark)))
                goto nla_put_failure;

        if (!info->packet_dumped) {
                if (nf_trace_fill_dev_info(skb, nft_in(pkt), nft_out(pkt)))
                        goto nla_put_failure;

                if (nf_trace_fill_pkt_info(skb, pkt))
                        goto nla_put_failure;

                if (nf_trace_fill_ct_info(skb, pkt->skb))
                        goto nla_put_failure;

                info->packet_dumped = true;
        }

        nlmsg_end(skb, nlh);
        nfnetlink_send(skb, nft_net(pkt), 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC);
        return;

 nla_put_failure:
        WARN_ON_ONCE(1);
        kfree_skb(skb);
}

void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
                    const struct nft_chain *chain)
{
        static siphash_key_t trace_key __read_mostly;
        struct sk_buff *skb = pkt->skb;

        info->basechain = nft_base_chain(chain);
        info->trace = true;
        info->nf_trace = pkt->skb->nf_trace;
        info->packet_dumped = false;

        net_get_random_once(&trace_key, sizeof(trace_key));

        info->skbid = (u32)siphash_3u32(hash32_ptr(skb),
                                        skb_get_hash_net(nft_net(pkt), skb),
                                        skb->skb_iif,
                                        &trace_key);
}