root/tools/testing/selftests/bpf/progs/test_sk_assign.c
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Cloudflare Ltd.
// Copyright (c) 2020 Isovalent, Inc.

#include <stddef.h>
#include <stdbool.h>
#include <string.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/pkt_cls.h>
#include <linux/tcp.h>
#include <sys/socket.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_misc.h"

#if defined(IPROUTE2_HAVE_LIBBPF)
/* Use a new-style map definition. */
struct {
        __uint(type, BPF_MAP_TYPE_SOCKMAP);
        __type(key, int);
        __type(value, __u64);
        __uint(pinning, LIBBPF_PIN_BY_NAME);
        __uint(max_entries, 1);
} server_map SEC(".maps");
#else
/* Pin map under /sys/fs/bpf/tc/globals/<map name> */
#define PIN_GLOBAL_NS 2

/* Must match struct bpf_elf_map layout from iproute2 */
struct {
        __u32 type;
        __u32 size_key;
        __u32 size_value;
        __u32 max_elem;
        __u32 flags;
        __u32 id;
        __u32 pinning;
} server_map SEC("maps") = {
        .type = BPF_MAP_TYPE_SOCKMAP,
        .size_key = sizeof(int),
        .size_value  = sizeof(__u64),
        .max_elem = 1,
        .pinning = PIN_GLOBAL_NS,
};
#endif

char _license[] SEC("license") = "GPL";

/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
static inline struct bpf_sock_tuple *
get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
{
        void *data_end = (void *)(long)skb->data_end;
        void *data = (void *)(long)skb->data;
        struct bpf_sock_tuple *result;
        struct ethhdr *eth;
        __u8 proto = 0;
        __u64 ihl_len;

        eth = (struct ethhdr *)(data);
        if (eth + 1 > data_end)
                return NULL;

        if (eth->h_proto == bpf_htons(ETH_P_IP)) {
                struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth));

                if (iph + 1 > data_end)
                        return NULL;
                if (iph->ihl != 5)
                        /* Options are not supported */
                        return NULL;
                ihl_len = iph->ihl * 4;
                proto = iph->protocol;
                *ipv4 = true;
                result = (struct bpf_sock_tuple *)&iph->saddr;
        } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
                struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth));

                if (ip6h + 1 > data_end)
                        return NULL;
                ihl_len = sizeof(*ip6h);
                proto = ip6h->nexthdr;
                *ipv4 = false;
                result = (struct bpf_sock_tuple *)&ip6h->saddr;
        } else {
                return (struct bpf_sock_tuple *)data;
        }

        if (proto != IPPROTO_TCP && proto != IPPROTO_UDP)
                return NULL;

        *tcp = (proto == IPPROTO_TCP);
        __sink(ihl_len);
        return result;
}

static inline int
handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
{
        struct bpf_sock *sk;
        const int zero = 0;
        size_t tuple_len;
        __be16 dport;
        int ret;

        tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
        if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
                return TC_ACT_SHOT;

        sk = bpf_sk_lookup_udp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
        if (sk)
                goto assign;

        dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
        if (dport != bpf_htons(4321))
                return TC_ACT_OK;

        sk = bpf_map_lookup_elem(&server_map, &zero);
        if (!sk)
                return TC_ACT_SHOT;

assign:
        ret = bpf_sk_assign(skb, sk, 0);
        bpf_sk_release(sk);
        return ret;
}

static inline int
handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
{
        struct bpf_sock *sk;
        const int zero = 0;
        size_t tuple_len;
        __be16 dport;
        int ret;

        tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
        if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
                return TC_ACT_SHOT;

        sk = bpf_skc_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
        if (sk) {
                if (sk->state != BPF_TCP_LISTEN)
                        goto assign;
                bpf_sk_release(sk);
        }

        dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
        if (dport != bpf_htons(4321))
                return TC_ACT_OK;

        sk = bpf_map_lookup_elem(&server_map, &zero);
        if (!sk)
                return TC_ACT_SHOT;

        if (sk->state != BPF_TCP_LISTEN) {
                bpf_sk_release(sk);
                return TC_ACT_SHOT;
        }

assign:
        ret = bpf_sk_assign(skb, sk, 0);
        bpf_sk_release(sk);
        return ret;
}

SEC("tc")
int bpf_sk_assign_test(struct __sk_buff *skb)
{
        struct bpf_sock_tuple *tuple;
        bool ipv4 = false;
        bool tcp = false;
        int ret = 0;

        tuple = get_tuple(skb, &ipv4, &tcp);
        if (!tuple)
                return TC_ACT_SHOT;

        /* Note that the verifier socket return type for bpf_skc_lookup_tcp()
         * differs from bpf_sk_lookup_udp(), so even though the C-level type is
         * the same here, if we try to share the implementations they will
         * fail to verify because we're crossing pointer types.
         */
        if (tcp)
                ret = handle_tcp(skb, tuple, ipv4);
        else
                ret = handle_udp(skb, tuple, ipv4);

        return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
}