root/tools/testing/selftests/bpf/progs/bpf_flow.c
// SPDX-License-Identifier: GPL-2.0
#include <limits.h>
#include <stddef.h>
#include <stdbool.h>
#include <string.h>
#include <linux/pkt_cls.h>
#include <linux/bpf.h>
#include <linux/in.h>
#include <linux/if_ether.h>
#include <linux/icmp.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/if_packet.h>
#include <sys/socket.h>
#include <linux/if_tunnel.h>
#include <linux/mpls.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>

#define PROG(F) PROG_(F, _##F)
#define PROG_(NUM, NAME) SEC("flow_dissector") int flow_dissector_##NUM

#define FLOW_CONTINUE_SADDR 0x7f00007f /* 127.0.0.127 */

/* These are the identifiers of the BPF programs that will be used in tail
 * calls. Name is limited to 16 characters, with the terminating character and
 * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
 */
#define IP              0
#define IPV6            1
#define IPV6OP          2 /* Destination/Hop-by-Hop Options IPv6 Ext. Header */
#define IPV6FR          3 /* Fragmentation IPv6 Extension Header */
#define MPLS            4
#define VLAN            5
#define MAX_PROG        6

#define IP_MF           0x2000
#define IP_OFFSET       0x1FFF
#define IP6_MF          0x0001
#define IP6_OFFSET      0xFFF8

struct vlan_hdr {
        __be16 h_vlan_TCI;
        __be16 h_vlan_encapsulated_proto;
};

struct gre_hdr {
        __be16 flags;
        __be16 proto;
};

struct frag_hdr {
        __u8 nexthdr;
        __u8 reserved;
        __be16 frag_off;
        __be32 identification;
};

struct {
        __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
        __uint(max_entries, MAX_PROG);
        __uint(key_size, sizeof(__u32));
        __uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");

struct {
        __uint(type, BPF_MAP_TYPE_HASH);
        __uint(max_entries, 1024);
        __type(key, __u32);
        __type(value, struct bpf_flow_keys);
} last_dissection SEC(".maps");

static __always_inline int export_flow_keys(struct bpf_flow_keys *keys,
                                            int ret)
{
        __u32 key = (__u32)(keys->sport) << 16 | keys->dport;
        struct bpf_flow_keys val;

        memcpy(&val, keys, sizeof(val));
        bpf_map_update_elem(&last_dissection, &key, &val, BPF_ANY);
        return ret;
}

#define IPV6_FLOWLABEL_MASK             __bpf_constant_htonl(0x000FFFFF)
static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr)
{
        return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK;
}

static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
                                                         __u16 hdr_size,
                                                         void *buffer)
{
        void *data_end = (void *)(long)skb->data_end;
        void *data = (void *)(long)skb->data;
        __u16 thoff = skb->flow_keys->thoff;
        __u8 *hdr;

        /* Verifies this variable offset does not overflow */
        if (thoff > (USHRT_MAX - hdr_size))
                return NULL;

        hdr = data + thoff;
        if (hdr + hdr_size <= data_end)
                return hdr;

        if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size))
                return NULL;

        return buffer;
}

/* Dispatches on ETHERTYPE */
static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
{
        struct bpf_flow_keys *keys = skb->flow_keys;

        switch (proto) {
        case bpf_htons(ETH_P_IP):
                bpf_tail_call_static(skb, &jmp_table, IP);
                break;
        case bpf_htons(ETH_P_IPV6):
                bpf_tail_call_static(skb, &jmp_table, IPV6);
                break;
        case bpf_htons(ETH_P_MPLS_MC):
        case bpf_htons(ETH_P_MPLS_UC):
                bpf_tail_call_static(skb, &jmp_table, MPLS);
                break;
        case bpf_htons(ETH_P_8021Q):
        case bpf_htons(ETH_P_8021AD):
                bpf_tail_call_static(skb, &jmp_table, VLAN);
                break;
        default:
                /* Protocol not supported */
                return export_flow_keys(keys, BPF_DROP);
        }

        return export_flow_keys(keys, BPF_DROP);
}

SEC("flow_dissector")
int _dissect(struct __sk_buff *skb)
{
        struct bpf_flow_keys *keys = skb->flow_keys;

        if (keys->n_proto == bpf_htons(ETH_P_IP)) {
                /* IP traffic from FLOW_CONTINUE_SADDR falls-back to
                 * standard dissector
                 */
                struct iphdr *iph, _iph;

                iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
                if (iph && iph->ihl == 5 &&
                    iph->saddr == bpf_htonl(FLOW_CONTINUE_SADDR)) {
                        return BPF_FLOW_DISSECTOR_CONTINUE;
                }
        }

        return parse_eth_proto(skb, keys->n_proto);
}

/* Parses on IPPROTO_* */
static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
{
        struct bpf_flow_keys *keys = skb->flow_keys;
        void *data_end = (void *)(long)skb->data_end;
        struct icmphdr *icmp, _icmp;
        struct gre_hdr *gre, _gre;
        struct ethhdr *eth, _eth;
        struct tcphdr *tcp, _tcp;
        struct udphdr *udp, _udp;

        switch (proto) {
        case IPPROTO_ICMP:
                icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
                if (!icmp)
                        return export_flow_keys(keys, BPF_DROP);
                return export_flow_keys(keys, BPF_OK);
        case IPPROTO_IPIP:
                keys->is_encap = true;
                if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
                        return export_flow_keys(keys, BPF_OK);

                return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
        case IPPROTO_IPV6:
                keys->is_encap = true;
                if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
                        return export_flow_keys(keys, BPF_OK);

                return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
        case IPPROTO_GRE:
                gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
                if (!gre)
                        return export_flow_keys(keys, BPF_DROP);

                if (bpf_htons(gre->flags & GRE_VERSION))
                        /* Only inspect standard GRE packets with version 0 */
                        return export_flow_keys(keys, BPF_OK);

                keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
                if (GRE_IS_CSUM(gre->flags))
                        keys->thoff += 4; /* Step over chksum and Padding */
                if (GRE_IS_KEY(gre->flags))
                        keys->thoff += 4; /* Step over key */
                if (GRE_IS_SEQ(gre->flags))
                        keys->thoff += 4; /* Step over sequence number */

                keys->is_encap = true;
                if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
                        return export_flow_keys(keys, BPF_OK);

                if (gre->proto == bpf_htons(ETH_P_TEB)) {
                        eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
                                                          &_eth);
                        if (!eth)
                                return export_flow_keys(keys, BPF_DROP);

                        keys->thoff += sizeof(*eth);

                        return parse_eth_proto(skb, eth->h_proto);
                } else {
                        return parse_eth_proto(skb, gre->proto);
                }
        case IPPROTO_TCP:
                tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
                if (!tcp)
                        return export_flow_keys(keys, BPF_DROP);

                if (tcp->doff < 5)
                        return export_flow_keys(keys, BPF_DROP);

                if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
                        return export_flow_keys(keys, BPF_DROP);

                keys->sport = tcp->source;
                keys->dport = tcp->dest;
                return export_flow_keys(keys, BPF_OK);
        case IPPROTO_UDP:
        case IPPROTO_UDPLITE:
                udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
                if (!udp)
                        return export_flow_keys(keys, BPF_DROP);

                keys->sport = udp->source;
                keys->dport = udp->dest;
                return export_flow_keys(keys, BPF_OK);
        default:
                return export_flow_keys(keys, BPF_DROP);
        }

        return export_flow_keys(keys, BPF_DROP);
}

static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
{
        struct bpf_flow_keys *keys = skb->flow_keys;

        switch (nexthdr) {
        case IPPROTO_HOPOPTS:
        case IPPROTO_DSTOPTS:
                bpf_tail_call_static(skb, &jmp_table, IPV6OP);
                break;
        case IPPROTO_FRAGMENT:
                bpf_tail_call_static(skb, &jmp_table, IPV6FR);
                break;
        default:
                return parse_ip_proto(skb, nexthdr);
        }

        return export_flow_keys(keys, BPF_DROP);
}

PROG(IP)(struct __sk_buff *skb)
{
        void *data_end = (void *)(long)skb->data_end;
        struct bpf_flow_keys *keys = skb->flow_keys;
        void *data = (void *)(long)skb->data;
        struct iphdr *iph, _iph;
        bool done = false;

        iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
        if (!iph)
                return export_flow_keys(keys, BPF_DROP);

        /* IP header cannot be smaller than 20 bytes */
        if (iph->ihl < 5)
                return export_flow_keys(keys, BPF_DROP);

        keys->addr_proto = ETH_P_IP;
        keys->ipv4_src = iph->saddr;
        keys->ipv4_dst = iph->daddr;
        keys->ip_proto = iph->protocol;

        keys->thoff += iph->ihl << 2;
        if (data + keys->thoff > data_end)
                return export_flow_keys(keys, BPF_DROP);

        if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
                keys->is_frag = true;
                if (iph->frag_off & bpf_htons(IP_OFFSET)) {
                        /* From second fragment on, packets do not have headers
                         * we can parse.
                         */
                        done = true;
                } else {
                        keys->is_first_frag = true;
                        /* No need to parse fragmented packet unless
                         * explicitly asked for.
                         */
                        if (!(keys->flags &
                              BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
                                done = true;
                }
        }

        if (done)
                return export_flow_keys(keys, BPF_OK);

        return parse_ip_proto(skb, iph->protocol);
}

PROG(IPV6)(struct __sk_buff *skb)
{
        struct bpf_flow_keys *keys = skb->flow_keys;
        struct ipv6hdr *ip6h, _ip6h;

        ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
        if (!ip6h)
                return export_flow_keys(keys, BPF_DROP);

        keys->addr_proto = ETH_P_IPV6;
        memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));

        keys->thoff += sizeof(struct ipv6hdr);
        keys->ip_proto = ip6h->nexthdr;
        keys->flow_label = ip6_flowlabel(ip6h);

        if (keys->flow_label && keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
                return export_flow_keys(keys, BPF_OK);

        return parse_ipv6_proto(skb, ip6h->nexthdr);
}

PROG(IPV6OP)(struct __sk_buff *skb)
{
        struct bpf_flow_keys *keys = skb->flow_keys;
        struct ipv6_opt_hdr *ip6h, _ip6h;

        ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
        if (!ip6h)
                return export_flow_keys(keys, BPF_DROP);

        /* hlen is in 8-octets and does not include the first 8 bytes
         * of the header
         */
        keys->thoff += (1 + ip6h->hdrlen) << 3;
        keys->ip_proto = ip6h->nexthdr;

        return parse_ipv6_proto(skb, ip6h->nexthdr);
}

PROG(IPV6FR)(struct __sk_buff *skb)
{
        struct bpf_flow_keys *keys = skb->flow_keys;
        struct frag_hdr *fragh, _fragh;

        fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
        if (!fragh)
                return export_flow_keys(keys, BPF_DROP);

        keys->thoff += sizeof(*fragh);
        keys->is_frag = true;
        keys->ip_proto = fragh->nexthdr;

        if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) {
                keys->is_first_frag = true;

                /* No need to parse fragmented packet unless
                 * explicitly asked for.
                 */
                if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
                        return export_flow_keys(keys, BPF_OK);
        } else {
                return export_flow_keys(keys, BPF_OK);
        }

        return parse_ipv6_proto(skb, fragh->nexthdr);
}

PROG(MPLS)(struct __sk_buff *skb)
{
        struct bpf_flow_keys *keys = skb->flow_keys;
        struct mpls_label *mpls, _mpls;

        mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
        if (!mpls)
                return export_flow_keys(keys, BPF_DROP);

        return export_flow_keys(keys, BPF_OK);
}

PROG(VLAN)(struct __sk_buff *skb)
{
        struct bpf_flow_keys *keys = skb->flow_keys;
        struct vlan_hdr *vlan, _vlan;

        /* Account for double-tagging */
        if (keys->n_proto == bpf_htons(ETH_P_8021AD)) {
                vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
                if (!vlan)
                        return export_flow_keys(keys, BPF_DROP);

                if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
                        return export_flow_keys(keys, BPF_DROP);

                keys->nhoff += sizeof(*vlan);
                keys->thoff += sizeof(*vlan);
        }

        vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
        if (!vlan)
                return export_flow_keys(keys, BPF_DROP);

        keys->nhoff += sizeof(*vlan);
        keys->thoff += sizeof(*vlan);
        /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
        if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
            vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
                return export_flow_keys(keys, BPF_DROP);

        keys->n_proto = vlan->h_vlan_encapsulated_proto;
        return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
}

char __license[] SEC("license") = "GPL";