root/tools/testing/selftests/bpf/progs/sockopt_sk.c
// SPDX-License-Identifier: GPL-2.0
#include <string.h>
#include <linux/tcp.h>
#include <linux/bpf.h>
#include <netinet/in.h>
#include <bpf/bpf_helpers.h>

char _license[] SEC("license") = "GPL";

int page_size = 0; /* userspace should set it */

#ifndef SOL_TCP
#define SOL_TCP IPPROTO_TCP
#endif

#define SOL_CUSTOM                      0xdeadbeef

struct sockopt_sk {
        __u8 val;
};

struct {
        __uint(type, BPF_MAP_TYPE_SK_STORAGE);
        __uint(map_flags, BPF_F_NO_PREALLOC);
        __type(key, int);
        __type(value, struct sockopt_sk);
} socket_storage_map SEC(".maps");

SEC("cgroup/getsockopt")
int _getsockopt(struct bpf_sockopt *ctx)
{
        __u8 *optval_end = ctx->optval_end;
        __u8 *optval = ctx->optval;
        struct sockopt_sk *storage;
        struct bpf_sock *sk;

        /* Bypass AF_NETLINK. */
        sk = ctx->sk;
        if (sk && sk->family == AF_NETLINK)
                goto out;

        /* Make sure bpf_get_netns_cookie is callable.
         */
        if (bpf_get_netns_cookie(NULL) == 0)
                return 0;

        if (bpf_get_netns_cookie(ctx) == 0)
                return 0;

        if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
                /* Not interested in SOL_IP:IP_TOS;
                 * let next BPF program in the cgroup chain or kernel
                 * handle it.
                 */
                goto out;
        }

        if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
                /* Not interested in SOL_SOCKET:SO_SNDBUF;
                 * let next BPF program in the cgroup chain or kernel
                 * handle it.
                 */
                goto out;
        }

        if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
                /* Not interested in SOL_TCP:TCP_CONGESTION;
                 * let next BPF program in the cgroup chain or kernel
                 * handle it.
                 */
                goto out;
        }

        if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
                /* Verify that TCP_ZEROCOPY_RECEIVE triggers.
                 * It has a custom implementation for performance
                 * reasons.
                 */

                /* Check that optval contains address (__u64) */
                if (optval + sizeof(__u64) > optval_end)
                        return 0; /* bounds check */

                if (((struct tcp_zerocopy_receive *)optval)->address != 0)
                        return 0; /* unexpected data */

                goto out;
        }

        if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
                if (optval + 1 > optval_end)
                        return 0; /* bounds check */

                ctx->retval = 0; /* Reset system call return value to zero */

                /* Always export 0x55 */
                optval[0] = 0x55;
                ctx->optlen = 1;

                /* Userspace buffer is PAGE_SIZE * 2, but BPF
                 * program can only see the first PAGE_SIZE
                 * bytes of data.
                 */
                if (optval_end - optval != page_size)
                        return 0; /* unexpected data size */

                return 1;
        }

        if (ctx->level != SOL_CUSTOM)
                return 0; /* deny everything except custom level */

        if (optval + 1 > optval_end)
                return 0; /* bounds check */

        storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
                                     BPF_SK_STORAGE_GET_F_CREATE);
        if (!storage)
                return 0; /* couldn't get sk storage */

        if (!ctx->retval)
                return 0; /* kernel should not have handled
                           * SOL_CUSTOM, something is wrong!
                           */
        ctx->retval = 0; /* Reset system call return value to zero */

        optval[0] = storage->val;
        ctx->optlen = 1;

        return 1;

out:
        /* optval larger than PAGE_SIZE use kernel's buffer. */
        if (ctx->optlen > page_size)
                ctx->optlen = 0;
        return 1;
}

SEC("cgroup/setsockopt")
int _setsockopt(struct bpf_sockopt *ctx)
{
        __u8 *optval_end = ctx->optval_end;
        __u8 *optval = ctx->optval;
        struct sockopt_sk *storage;
        struct bpf_sock *sk;

        /* Bypass AF_NETLINK. */
        sk = ctx->sk;
        if (sk && sk->family == AF_NETLINK)
                goto out;

        /* Make sure bpf_get_netns_cookie is callable.
         */
        if (bpf_get_netns_cookie(NULL) == 0)
                return 0;

        if (bpf_get_netns_cookie(ctx) == 0)
                return 0;

        if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
                /* Not interested in SOL_IP:IP_TOS;
                 * let next BPF program in the cgroup chain or kernel
                 * handle it.
                 */
                ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
                return 1;
        }

        if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
                /* Overwrite SO_SNDBUF value */

                if (optval + sizeof(__u32) > optval_end)
                        return 0; /* bounds check */

                *(__u32 *)optval = 0x55AA;
                ctx->optlen = 4;

                return 1;
        }

        if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
                /* Always use cubic */

                if (optval + 5 > optval_end)
                        return 0; /* bounds check */

                memcpy(optval, "cubic", 5);
                ctx->optlen = 5;

                return 1;
        }

        if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
                /* Original optlen is larger than PAGE_SIZE. */
                if (ctx->optlen != page_size * 2)
                        return 0; /* unexpected data size */

                if (optval + 1 > optval_end)
                        return 0; /* bounds check */

                /* Make sure we can trim the buffer. */
                optval[0] = 0;
                ctx->optlen = 1;

                /* Usepace buffer is PAGE_SIZE * 2, but BPF
                 * program can only see the first PAGE_SIZE
                 * bytes of data.
                 */
                if (optval_end - optval != page_size)
                        return 0; /* unexpected data size */

                return 1;
        }

        if (ctx->level != SOL_CUSTOM)
                return 0; /* deny everything except custom level */

        if (optval + 1 > optval_end)
                return 0; /* bounds check */

        storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
                                     BPF_SK_STORAGE_GET_F_CREATE);
        if (!storage)
                return 0; /* couldn't get sk storage */

        storage->val = optval[0];
        ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
                           * setsockopt handler.
                           */

        return 1;

out:
        /* optval larger than PAGE_SIZE use kernel's buffer. */
        if (ctx->optlen > page_size)
                ctx->optlen = 0;
        return 1;
}